Hi Vladislav,

This looks good to me. I think documentation should be adjusted to not
explain in detail ID allocations for VXLAN in two separate places. Assuming
the documentation is de-duplicated and typos Dumitru spotted are addressed,

Acked-by: Ihar Hrachyshka <ihrac...@redhat.com>

On Thu, Apr 4, 2024 at 2:41 PM Vladislav Odintsov <odiv...@gmail.com> wrote:

> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
> for available tunnel IDs because of lack of space in VXLAN VNI.
> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
> and 2047 logical switch ports per datapath.
>
> Prior to this patch vxlan mode was enabled automatically if at least one
> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>
> This patch adds support for explicit disabling of vxlan mode via
> Northbound database.
>
> 1: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>
> CC: Ihar Hrachyshka <ihrac...@redhat.com>
> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
> Signed-off-by: Vladislav Odintsov <odiv...@gmail.com>
> ---
>  NEWS                      |  3 ++
>  northd/en-global-config.c |  9 +++-
>  northd/northd.c           | 90 ++++++++++++++++++---------------------
>  northd/northd.h           |  6 ++-
>  ovn-nb.xml                | 12 ++++++
>  tests/ovn-northd.at       | 29 +++++++++++++
>  6 files changed, 97 insertions(+), 52 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 141f1831c..fe95391cd 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -13,6 +13,9 @@ Post v24.03.0
>      "lflow-stage-to-oftable STAGE_NAME" that converts stage name into
> OpenFlow
>      table id.
>    - Rename the ovs-sandbox script to ovn-sandbox.
> +  - Added new global config option NB_Global:options:disable_vxlan_mode to
> +    extend available tunnel IDs space for datapaths from 4095 to 16711680.
> +    For more details see man ovn-nb for mentioned option.
>

Nit: it's customary to refer to man pages with the section number included,
e.g. `ovn-nb(5)`.



>
>  OVN v24.03.0 - 01 Mar 2024
>  --------------------------
> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
> index 34e393b33..9310c4575 100644
> --- a/northd/en-global-config.c
> +++ b/northd/en-global-config.c
> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void
> *data)
>                       config_data->svc_monitor_mac);
>      }
>
> -    char *max_tunid = xasprintf("%d",
> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>      smap_replace(options, "max_tunid", max_tunid);
>      free(max_tunid);
>
> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct
> nbrec_nb_global *nb,
>          return true;
>      }
>
> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
> +                           "disable_vxlan_mode", false)) {
> +        return true;
> +    }
> +
>      return false;
>  }
>
> diff --git a/northd/northd.c b/northd/northd.c
> index c568f6360..859b233e8 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>   */
>  static bool default_acl_drop;
>
> +/* If this option is 'true' northd will use limited 24-bit space for
> datapath
> + * and ports tunnel key allocation (12 bits for each instead of default
> 16). */
> +static bool vxlan_mode;
> +
>  #define MAX_OVN_TAGS 4096
>
>
> @@ -875,24 +879,31 @@ join_datapaths(const struct
> nbrec_logical_switch_table *nbrec_ls_table,
>      }
>  }
>
> -static bool
> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
> +void
> +init_vxlan_mode(const struct smap *nb_options,
> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>  {
> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
> +        vxlan_mode = false;
> +        return;
> +    }
> +
>      const struct sbrec_chassis *chassis;
>      SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>          for (int i = 0; i < chassis->n_encaps; i++) {
>              if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
> -                return true;
> +                vxlan_mode = true;
> +                return;
>              }
>          }
>      }
> -    return false;
> +    vxlan_mode = false;
>  }
>

(No action required) AFAIU most of the changes in this patch are due to
this change to is_vxlan_mode that is not strictly needed for the feature to
work.

It may make sense to read the option only once per Global_Config update /
en_northd_run execution; but this could be judged independently from the
new flag too. But I don't insist that this patch is split. Just note that
it will clash with this series:
https://patchwork.ozlabs.org/project/ovn/list/?series=404623 (whichever
goes in first).



>
>  uint32_t
> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table
> *sbrec_chassis_table)
> +get_ovn_max_dp_key_local(void)
>  {
> -    if (is_vxlan_mode(sbrec_chassis_table)) {
> +    if (vxlan_mode) {
>          /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>          return OVN_MAX_DP_VXLAN_KEY;
>      }
> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct
> sbrec_chassis_table *sbrec_chassis_table)
>  }
>
>  static void
> -ovn_datapath_allocate_key(const struct sbrec_chassis_table
> *sbrec_ch_table,
> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>                            struct ovn_datapath *od, uint32_t *hint)
>  {
>      if (!od->tunnel_key) {
>          od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
> -                                    OVN_MIN_DP_KEY_LOCAL,
> -
> get_ovn_max_dp_key_local(sbrec_ch_table),
> -                                    hint);
> +                                            OVN_MIN_DP_KEY_LOCAL,
> +                                            get_ovn_max_dp_key_local(),
> +                                            hint);
>          if (!od->tunnel_key) {
>              if (od->sb) {
>                  sbrec_datapath_binding_delete(od->sb);
> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct
> sbrec_chassis_table *sbrec_ch_table,
>
>  static void
>  ovn_datapath_assign_requested_tnl_id(
> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>      struct hmap *dp_tnlids, struct ovn_datapath *od)
>  {
>      const struct smap *other_config = (od->nbs
> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>      uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key",
> 0);
>      if (tunnel_key) {
>          const char *interconn_ts = smap_get(other_config, "interconn-ts");
> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
> -            tunnel_key >= 1 << 12) {
> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>                           "incompatible with VXLAN", tunnel_key,
> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>                  const struct nbrec_logical_switch_table *nbrec_ls_table,
>                  const struct nbrec_logical_router_table *nbrec_lr_table,
>                  const struct sbrec_datapath_binding_table *sbrec_dp_table,
> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>                  struct ovn_datapaths *ls_datapaths,
>                  struct ovn_datapaths *lr_datapaths,
>                  struct ovs_list *lr_list)
> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>      struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>      struct ovn_datapath *od;
>      LIST_FOR_EACH (od, list, &both) {
> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
> &dp_tnlids,
> -                                             od);
> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>      }
>      LIST_FOR_EACH (od, list, &nb_only) {
> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
> &dp_tnlids,
> -                                             od); }
> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
> +    }
>
>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>      LIST_FOR_EACH (od, list, &both) {
> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>      /* Assign new tunnel ids where needed. */
>      uint32_t hint = 0;
>      LIST_FOR_EACH_SAFE (od, list, &both) {
> -        ovn_datapath_allocate_key(sbrec_chassis_table,
> -                                  datapaths, &dp_tnlids, od, &hint);
> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>      }
>      LIST_FOR_EACH_SAFE (od, list, &nb_only) {
> -        ovn_datapath_allocate_key(sbrec_chassis_table,
> -                                  datapaths, &dp_tnlids, od, &hint);
> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>      }
>
>      /* Sync tunnel ids from nb to sb. */
> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t
> tunnel_key)
>   * that the I-P engine can fallback to recompute if needed; otherwise
> return
>   * true (even if the key is not allocated). */
>  static bool
> -ovn_port_assign_requested_tnl_id(
> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct
> ovn_port *op)
> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>  {
>      const struct smap *options = (op->nbsp
>                                    ? &op->nbsp->options
>                                    : &op->nbrp->options);
>      uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>      if (tunnel_key) {
> -        if (is_vxlan_mode(sbrec_chassis_table) &&
> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>                           "is incompatible with VXLAN",
> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>  }
>
>  static bool
> -ovn_port_allocate_key(const struct sbrec_chassis_table
> *sbrec_chassis_table,
> -                      struct hmap *ports,
> -                      struct ovn_port *op)
> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>  {
>      if (!op->tunnel_key) {
> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>          op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>                                              1, (1u << (key_bits - 1)) - 1,
>                                              &op->od->port_key_hint);
> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct
> sbrec_chassis_table *sbrec_chassis_table,
>  static void
>  build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>      const struct sbrec_port_binding_table *sbrec_port_binding_table,
> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>      const struct sbrec_mirror_table *sbrec_mirror_table,
>      const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>      const struct sbrec_ha_chassis_group_table
> *sbrec_ha_chassis_group_table,
> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>      /* Assign explicitly requested tunnel ids first. */
>      struct ovn_port *op;
>      LIST_FOR_EACH (op, list, &both) {
> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
> +        ovn_port_assign_requested_tnl_id(op);
>      }
>      LIST_FOR_EACH (op, list, &nb_only) {
> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
> +        ovn_port_assign_requested_tnl_id(op);
>      }
>
>      /* Keep nonconflicting tunnel IDs that are already assigned. */
> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>
>      /* Assign new tunnel ids where needed. */
>      LIST_FOR_EACH_SAFE (op, list, &both) {
> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
> +        ovn_port_allocate_key(ports, op);
>      }
>      LIST_FOR_EACH_SAFE (op, list, &nb_only) {
> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
> +        ovn_port_allocate_key(ports, op);
>      }
>
>      /* For logical ports that are in both databases, update the southbound
> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>               struct hmap *ls_ports, struct ovn_datapath *od,
>               const struct sbrec_port_binding *sb,
>               const struct sbrec_mirror_table *sbrec_mirror_table,
> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>  {
>      op->od = od;
>      parse_lsp_addrs(op);
>      /* Assign explicitly requested tunnel ids first. */
> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>          return false;
>      }
>      if (sb) {
> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>          sbrec_port_binding_set_logical_port(op->sb, op->key);
>      }
>      /* Assign new tunnel ids where needed. */
> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>          return false;
>      }
>      ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn,
> struct hmap *ls_ports,
>                 const char *key, const struct nbrec_logical_switch_port
> *nbsp,
>                 struct ovn_datapath *od, const struct sbrec_port_binding
> *sb,
>                 const struct sbrec_mirror_table *sbrec_mirror_table,
> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>                 struct ovsdb_idl_index *sbrec_chassis_by_name,
>                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>  {
>      struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>                                            NULL);
>      hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> -                      sbrec_mirror_table, sbrec_chassis_table,
> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>                        sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
>          ovn_port_destroy(ls_ports, op);
>          return NULL;
> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>                  struct ovn_datapath *od,
>                  const struct sbrec_port_binding *sb,
>                  const struct sbrec_mirror_table *sbrec_mirror_table,
> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>                  struct ovsdb_idl_index *sbrec_chassis_by_name,
>                  struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>  {
> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>      op->sb = sb;
>      ovn_port_set_nb(op, nbsp, nbrp);
>      op->l3dgw_port = op->cr_port = NULL;
> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> -                        sbrec_mirror_table, sbrec_chassis_table,
> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> sbrec_mirror_table,
>                          sbrec_chassis_by_name, sbrec_chassis_by_hostname);
>  }
>
> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>              op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>                                  new_nbsp->name, new_nbsp, od, NULL,
>                                  ni->sbrec_mirror_table,
> -                                ni->sbrec_chassis_table,
>                                  ni->sbrec_chassis_by_name,
>                                  ni->sbrec_chassis_by_hostname);
>              if (!op) {
> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>              if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>                                  new_nbsp, NULL,
>                                  od, sb, ni->sbrec_mirror_table,
> -                                ni->sbrec_chassis_table,
>                                  ni->sbrec_chassis_by_name,
>                                  ni->sbrec_chassis_by_hostname)) {
>                  goto fail;
> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>      use_common_zone = smap_get_bool(input_data->nb_options,
> "use_common_zone",
>                                      false);
>
> +    init_vxlan_mode(input_data->nb_options,
> input_data->sbrec_chassis_table);
> +
>      build_datapaths(ovnsb_txn,
>                      input_data->nbrec_logical_switch_table,
>                      input_data->nbrec_logical_router_table,
>                      input_data->sbrec_datapath_binding_table,
> -                    input_data->sbrec_chassis_table,
>                      &data->ls_datapaths,
>                      &data->lr_datapaths, &data->lr_list);
>      build_lb_datapaths(input_data->lbs, input_data->lbgrps,
> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>                         &data->lb_datapaths_map,
> &data->lb_group_datapaths_map);
>      build_ports(ovnsb_txn,
>                  input_data->sbrec_port_binding_table,
> -                input_data->sbrec_chassis_table,
>                  input_data->sbrec_mirror_table,
>                  input_data->sbrec_mac_binding_table,
>                  input_data->sbrec_ha_chassis_group_table,
> diff --git a/northd/northd.h b/northd/northd.h
> index 5e9fa4745..ed9d992fb 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath
> *od)
>      return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>  }
>
> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
> +void
> +init_vxlan_mode(const struct smap *nb_options,
> +                const struct sbrec_chassis_table *sbrec_chassis_table);
> +
> +uint32_t get_ovn_max_dp_key_local(void);
>
>  #endif /* NORTHD_H */
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index b652046a7..9b2cb355e 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -381,6 +381,18 @@
>          of SB changes would be very noticeable.
>        </column>
>
> +      <column name="options" key="disable_vxlan_mode">
> +        Be default if at least one chassis in OVN cluster has VXLAN encap,
> +        northd will run in a `vxlan mode`, which means it splits 24-bit
> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.
> Maximum
> +        datapaths count in this mode is 4095 and maximum ports per
> datapath is
> +        2047.  Rest of IDs are used for multicast groups.
> +        In case VXLAN encaps are needed on chassis only to support HW VTEP
> +        functionality, and main encap type is GENEVE or STT, set this
> option to
> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS
> and 15
>

-> tunnel IDs (but see below)



> +        bits for port tunnel IDs.
> +      </column>
> +
>

This text is explaining what is already captured in the `Tunnel
Encapsulations` section of ovn-architecture man page. It would be nice to
refer to the section in the other document, somehow, instead of explaining
the math twice. I'd suggest to keep the option documentation here to the
minimum and leave details in the architecture document.


>        <group title="Options for configuring interconnection route
> advertisement">
>          <p>
>            These options control how routes are advertised between OVN
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index fc2c972a4..6edb1129e 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -2819,6 +2819,35 @@ AT_CHECK(
>  get_tunnel_keys
>  AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
>
> +AT_CLEANUP
> +])
> +OVN_FOR_EACH_NORTHD_NO_HV([
> +AT_SETUP([check vxlan mode disabling])
> +ovn_start
> +
> +# Create a fake chassis with vxlan encap to implicitly enable vxlan mode.
> +ovn-sbctl \
> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \
> +    -- --id=@c create chassis name=hv1 encaps=@e
> +
> +cmd="ovn-nbctl --wait=sb"
> +for i in {1..4097..1}; do
> +    cmd="${cmd} -- ls-add lsw-${i}"
> +done
> +
> +eval $cmd
> +
> +check_row_count nb:Logical_Switch 4097
> +wait_row_count sb:Datapath_Binding 4095
> +
> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted"
> northd/ovn-northd.log])
> +
> +# Explicitly disable vxlan mode and check that two remaining datapaths
> were created.
> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
> +
> +check_row_count nb:Logical_Switch 4097
> +wait_row_count sb:Datapath_Binding 4097
> +
>  AT_CLEANUP
>  ])
>
> --
> 2.44.0
>
>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to