Thank you Vladislav.

FYI it was reported in the past in
https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html
but fell through cracks then. Thanks for picking it up!

In your patch, you introduce a new config option to disable the
'vxlan-mode' behavior. This will definitely work. But I wonder if we can
automatically detect this scenario by ignoring the chassis that are VTEP
from consideration? I believe ovn-controller-vtep sets `is-vtep` in
other_config, so - would it work if we modify is_vxlan_mode to consider it
too?

Thanks again for looking into this.
Ihar

On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odiv...@gmail.com> wrote:

> Commit [1] introduced a "vxlan mode" concept.  It brought a limitation
> for available tunnel IDs because of lack of space in VXLAN VNI.
> In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs)
> and 2047 logical switch ports per datapath.
>
> Prior to this patch vxlan mode was enabled automatically if at least one
> chassis had encap of vxlan type.  In scenarios where one want to use VXLAN
> only for HW VTEP (RAMP) switch, such limitation makes no sence.
>
> This patch adds support for explicit disabling of vxlan mode via
> Northbound database.
>
> 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068
>
> CC: Ihar Hrachyshka <ihrac...@redhat.com>
> Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings")
> Signed-off-by: Vladislav Odintsov <odiv...@gmail.com>
> ---
>  northd/en-global-config.c |  9 +++-
>  northd/northd.c           | 90 ++++++++++++++++++---------------------
>  northd/northd.h           |  6 ++-
>  ovn-nb.xml                | 12 ++++++
>  tests/ovn-northd.at       | 29 +++++++++++++
>  5 files changed, 94 insertions(+), 52 deletions(-)
>
> diff --git a/northd/en-global-config.c b/northd/en-global-config.c
> index 34e393b33..9310c4575 100644
> --- a/northd/en-global-config.c
> +++ b/northd/en-global-config.c
> @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void
> *data)
>                       config_data->svc_monitor_mac);
>      }
>
> -    char *max_tunid = xasprintf("%d",
> -        get_ovn_max_dp_key_local(sbrec_chassis_table));
> +    init_vxlan_mode(&nb->options, sbrec_chassis_table);
> +    char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local());
>      smap_replace(options, "max_tunid", max_tunid);
>      free(max_tunid);
>
> @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct
> nbrec_nb_global *nb,
>          return true;
>      }
>
> +    if (config_out_of_sync(&nb->options, &config_data->nb_options,
> +                           "disable_vxlan_mode", false)) {
> +        return true;
> +    }
> +
>      return false;
>  }
>
> diff --git a/northd/northd.c b/northd/northd.c
> index c568f6360..859b233e8 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true;
>   */
>  static bool default_acl_drop;
>
> +/* If this option is 'true' northd will use limited 24-bit space for
> datapath
> + * and ports tunnel key allocation (12 bits for each instead of default
> 16). */
> +static bool vxlan_mode;
> +
>  #define MAX_OVN_TAGS 4096
>
>
> @@ -875,24 +879,31 @@ join_datapaths(const struct
> nbrec_logical_switch_table *nbrec_ls_table,
>      }
>  }
>
> -static bool
> -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table)
> +void
> +init_vxlan_mode(const struct smap *nb_options,
> +                const struct sbrec_chassis_table *sbrec_chassis_table)
>  {
> +    if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) {
> +        vxlan_mode = false;
> +        return;
> +    }
> +
>      const struct sbrec_chassis *chassis;
>      SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) {
>          for (int i = 0; i < chassis->n_encaps; i++) {
>              if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
> -                return true;
> +                vxlan_mode = true;
> +                return;
>              }
>          }
>      }
> -    return false;
> +    vxlan_mode = false;
>  }
>
>  uint32_t
> -get_ovn_max_dp_key_local(const struct sbrec_chassis_table
> *sbrec_chassis_table)
> +get_ovn_max_dp_key_local(void)
>  {
> -    if (is_vxlan_mode(sbrec_chassis_table)) {
> +    if (vxlan_mode) {
>          /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
>          return OVN_MAX_DP_VXLAN_KEY;
>      }
> @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct
> sbrec_chassis_table *sbrec_chassis_table)
>  }
>
>  static void
> -ovn_datapath_allocate_key(const struct sbrec_chassis_table
> *sbrec_ch_table,
> -                          struct hmap *datapaths, struct hmap *dp_tnlids,
> +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids,
>                            struct ovn_datapath *od, uint32_t *hint)
>  {
>      if (!od->tunnel_key) {
>          od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath",
> -                                    OVN_MIN_DP_KEY_LOCAL,
> -
> get_ovn_max_dp_key_local(sbrec_ch_table),
> -                                    hint);
> +                                            OVN_MIN_DP_KEY_LOCAL,
> +                                            get_ovn_max_dp_key_local(),
> +                                            hint);
>          if (!od->tunnel_key) {
>              if (od->sb) {
>                  sbrec_datapath_binding_delete(od->sb);
> @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct
> sbrec_chassis_table *sbrec_ch_table,
>
>  static void
>  ovn_datapath_assign_requested_tnl_id(
> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>      struct hmap *dp_tnlids, struct ovn_datapath *od)
>  {
>      const struct smap *other_config = (od->nbs
> @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id(
>      uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key",
> 0);
>      if (tunnel_key) {
>          const char *interconn_ts = smap_get(other_config, "interconn-ts");
> -        if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) &&
> -            tunnel_key >= 1 << 12) {
> +        if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) {
>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is "
>                           "incompatible with VXLAN", tunnel_key,
> @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>                  const struct nbrec_logical_switch_table *nbrec_ls_table,
>                  const struct nbrec_logical_router_table *nbrec_lr_table,
>                  const struct sbrec_datapath_binding_table *sbrec_dp_table,
> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>                  struct ovn_datapaths *ls_datapaths,
>                  struct ovn_datapaths *lr_datapaths,
>                  struct ovs_list *lr_list)
> @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>      struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
>      struct ovn_datapath *od;
>      LIST_FOR_EACH (od, list, &both) {
> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
> &dp_tnlids,
> -                                             od);
> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
>      }
>      LIST_FOR_EACH (od, list, &nb_only) {
> -        ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table,
> &dp_tnlids,
> -                                             od); }
> +        ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od);
> +    }
>
>      /* Keep nonconflicting tunnel IDs that are already assigned. */
>      LIST_FOR_EACH (od, list, &both) {
> @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn,
>      /* Assign new tunnel ids where needed. */
>      uint32_t hint = 0;
>      LIST_FOR_EACH_SAFE (od, list, &both) {
> -        ovn_datapath_allocate_key(sbrec_chassis_table,
> -                                  datapaths, &dp_tnlids, od, &hint);
> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>      }
>      LIST_FOR_EACH_SAFE (od, list, &nb_only) {
> -        ovn_datapath_allocate_key(sbrec_chassis_table,
> -                                  datapaths, &dp_tnlids, od, &hint);
> +        ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint);
>      }
>
>      /* Sync tunnel ids from nb to sb. */
> @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t
> tunnel_key)
>   * that the I-P engine can fallback to recompute if needed; otherwise
> return
>   * true (even if the key is not allocated). */
>  static bool
> -ovn_port_assign_requested_tnl_id(
> -    const struct sbrec_chassis_table *sbrec_chassis_table, struct
> ovn_port *op)
> +ovn_port_assign_requested_tnl_id(struct ovn_port *op)
>  {
>      const struct smap *options = (op->nbsp
>                                    ? &op->nbsp->options
>                                    : &op->nbrp->options);
>      uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0);
>      if (tunnel_key) {
> -        if (is_vxlan_mode(sbrec_chassis_table) &&
> -                tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
> +        if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) {
>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>              VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s "
>                           "is incompatible with VXLAN",
> @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id(
>  }
>
>  static bool
> -ovn_port_allocate_key(const struct sbrec_chassis_table
> *sbrec_chassis_table,
> -                      struct hmap *ports,
> -                      struct ovn_port *op)
> +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op)
>  {
>      if (!op->tunnel_key) {
> -        uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16;
> +        uint8_t key_bits = vxlan_mode ? 12 : 16;
>          op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port",
>                                              1, (1u << (key_bits - 1)) - 1,
>                                              &op->od->port_key_hint);
> @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct
> sbrec_chassis_table *sbrec_chassis_table,
>  static void
>  build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>      const struct sbrec_port_binding_table *sbrec_port_binding_table,
> -    const struct sbrec_chassis_table *sbrec_chassis_table,
>      const struct sbrec_mirror_table *sbrec_mirror_table,
>      const struct sbrec_mac_binding_table *sbrec_mac_binding_table,
>      const struct sbrec_ha_chassis_group_table
> *sbrec_ha_chassis_group_table,
> @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>      /* Assign explicitly requested tunnel ids first. */
>      struct ovn_port *op;
>      LIST_FOR_EACH (op, list, &both) {
> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
> +        ovn_port_assign_requested_tnl_id(op);
>      }
>      LIST_FOR_EACH (op, list, &nb_only) {
> -        ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op);
> +        ovn_port_assign_requested_tnl_id(op);
>      }
>
>      /* Keep nonconflicting tunnel IDs that are already assigned. */
> @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn,
>
>      /* Assign new tunnel ids where needed. */
>      LIST_FOR_EACH_SAFE (op, list, &both) {
> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
> +        ovn_port_allocate_key(ports, op);
>      }
>      LIST_FOR_EACH_SAFE (op, list, &nb_only) {
> -        ovn_port_allocate_key(sbrec_chassis_table, ports, op);
> +        ovn_port_allocate_key(ports, op);
>      }
>
>      /* For logical ports that are in both databases, update the southbound
> @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>               struct hmap *ls_ports, struct ovn_datapath *od,
>               const struct sbrec_port_binding *sb,
>               const struct sbrec_mirror_table *sbrec_mirror_table,
> -             const struct sbrec_chassis_table *sbrec_chassis_table,
>               struct ovsdb_idl_index *sbrec_chassis_by_name,
>               struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>  {
>      op->od = od;
>      parse_lsp_addrs(op);
>      /* Assign explicitly requested tunnel ids first. */
> -    if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) {
> +    if (!ovn_port_assign_requested_tnl_id(op)) {
>          return false;
>      }
>      if (sb) {
> @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>          sbrec_port_binding_set_logical_port(op->sb, op->key);
>      }
>      /* Assign new tunnel ids where needed. */
> -    if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) {
> +    if (!ovn_port_allocate_key(ls_ports, op)) {
>          return false;
>      }
>      ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name,
> @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn,
> struct hmap *ls_ports,
>                 const char *key, const struct nbrec_logical_switch_port
> *nbsp,
>                 struct ovn_datapath *od, const struct sbrec_port_binding
> *sb,
>                 const struct sbrec_mirror_table *sbrec_mirror_table,
> -               const struct sbrec_chassis_table *sbrec_chassis_table,
>                 struct ovsdb_idl_index *sbrec_chassis_by_name,
>                 struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>  {
>      struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL,
>                                            NULL);
>      hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node));
> -    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> -                      sbrec_mirror_table, sbrec_chassis_table,
> +    if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table,
>                        sbrec_chassis_by_name, sbrec_chassis_by_hostname)) {
>          ovn_port_destroy(ls_ports, op);
>          return NULL;
> @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>                  struct ovn_datapath *od,
>                  const struct sbrec_port_binding *sb,
>                  const struct sbrec_mirror_table *sbrec_mirror_table,
> -                const struct sbrec_chassis_table *sbrec_chassis_table,
>                  struct ovsdb_idl_index *sbrec_chassis_by_name,
>                  struct ovsdb_idl_index *sbrec_chassis_by_hostname)
>  {
> @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct
> ovsdb_idl_txn *ovnsb_txn,
>      op->sb = sb;
>      ovn_port_set_nb(op, nbsp, nbrp);
>      op->l3dgw_port = op->cr_port = NULL;
> -    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> -                        sbrec_mirror_table, sbrec_chassis_table,
> +    return ls_port_init(op, ovnsb_txn, ls_ports, od, sb,
> sbrec_mirror_table,
>                          sbrec_chassis_by_name, sbrec_chassis_by_hostname);
>  }
>
> @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>              op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports,
>                                  new_nbsp->name, new_nbsp, od, NULL,
>                                  ni->sbrec_mirror_table,
> -                                ni->sbrec_chassis_table,
>                                  ni->sbrec_chassis_by_name,
>                                  ni->sbrec_chassis_by_hostname);
>              if (!op) {
> @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn
> *ovnsb_idl_txn,
>              if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports,
>                                  new_nbsp, NULL,
>                                  od, sb, ni->sbrec_mirror_table,
> -                                ni->sbrec_chassis_table,
>                                  ni->sbrec_chassis_by_name,
>                                  ni->sbrec_chassis_by_hostname)) {
>                  goto fail;
> @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data,
>      use_common_zone = smap_get_bool(input_data->nb_options,
> "use_common_zone",
>                                      false);
>
> +    init_vxlan_mode(input_data->nb_options,
> input_data->sbrec_chassis_table);
> +
>      build_datapaths(ovnsb_txn,
>                      input_data->nbrec_logical_switch_table,
>                      input_data->nbrec_logical_router_table,
>                      input_data->sbrec_datapath_binding_table,
> -                    input_data->sbrec_chassis_table,
>                      &data->ls_datapaths,
>                      &data->lr_datapaths, &data->lr_list);
>      build_lb_datapaths(input_data->lbs, input_data->lbgrps,
> @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data,
>                         &data->lb_datapaths_map,
> &data->lb_group_datapaths_map);
>      build_ports(ovnsb_txn,
>                  input_data->sbrec_port_binding_table,
> -                input_data->sbrec_chassis_table,
>                  input_data->sbrec_mirror_table,
>                  input_data->sbrec_mac_binding_table,
>                  input_data->sbrec_ha_chassis_group_table,
> diff --git a/northd/northd.h b/northd/northd.h
> index 5e9fa4745..ed9d992fb 100644
> --- a/northd/northd.h
> +++ b/northd/northd.h
> @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath
> *od)
>      return od->n_l3dgw_ports > 1 && !od->is_gw_router;
>  }
>
> -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *);
> +void
> +init_vxlan_mode(const struct smap *nb_options,
> +                const struct sbrec_chassis_table *sbrec_chassis_table);
> +
> +uint32_t get_ovn_max_dp_key_local(void);
>
>  #endif /* NORTHD_H */
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index b652046a7..9b2cb355e 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -381,6 +381,18 @@
>          of SB changes would be very noticeable.
>        </column>
>
> +      <column name="options" key="disable_vxlan_mode">
> +        Be default if at least one chassis in OVN cluster has VXLAN encap,
> +        northd will run in a `vxlan mode`, which means it splits 24-bit
> +        VXLAN VNI for datapath and port tunnel IDs allocation evenly.
> Maximum
> +        datapaths count in this mode is 4095 and maximum ports per
> datapath is
> +        2047.  Rest of IDs are used for multicast groups.
> +        In case VXLAN encaps are needed on chassis only to support HW VTEP
> +        functionality, and main encap type is GENEVE or STT, set this
> option to
> +        `false` to use defaults -- 16-bit space for datapath tunnel IDS
> and 15
> +        bits for port tunnel IDs.
> +      </column>
> +
>        <group title="Options for configuring interconnection route
> advertisement">
>          <p>
>            These options control how routes are advertised between OVN
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index cd53755b2..f5d025852 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -2819,6 +2819,35 @@ AT_CHECK(
>  get_tunnel_keys
>  AT_CHECK([test $lsp02 = 3 && test $ls1 = 123])
>
> +AT_CLEANUP
> +])
> +OVN_FOR_EACH_NORTHD_NO_HV([
> +AT_SETUP([check vxlan mode disabling])
> +ovn_start
> +
> +# Create a fake chassis with vxlan encap to implicitly enable vxlan mode.
> +ovn-sbctl \
> +    --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \
> +    -- --id=@c create chassis name=hv1 encaps=@e
> +
> +cmd="ovn-nbctl --wait=sb"
> +for i in {1..4097..1}; do
> +    cmd="${cmd} -- ls-add lsw-${i}"
> +done
> +
> +eval $cmd
> +
> +check_row_count nb:Logical_Switch 4097
> +wait_row_count sb:Datapath_Binding 4095
> +
> +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted"
> northd/ovn-northd.log])
> +
> +# Explicitly disable vxlan mode and check that two remaining datapaths
> were created.
> +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true
> +
> +check_row_count nb:Logical_Switch 4097
> +wait_row_count sb:Datapath_Binding 4097
> +
>  AT_CLEANUP
>  ])
>
> --
> 2.44.0
>
>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to