Thank you Vladislav. FYI it was reported in the past in https://mail.openvswitch.org/pipermail/ovs-discuss/2022-July/051931.html but fell through cracks then. Thanks for picking it up!
In your patch, you introduce a new config option to disable the 'vxlan-mode' behavior. This will definitely work. But I wonder if we can automatically detect this scenario by ignoring the chassis that are VTEP from consideration? I believe ovn-controller-vtep sets `is-vtep` in other_config, so - would it work if we modify is_vxlan_mode to consider it too? Thanks again for looking into this. Ihar On Wed, Apr 3, 2024 at 6:34 AM Vladislav Odintsov <odiv...@gmail.com> wrote: > Commit [1] introduced a "vxlan mode" concept. It brought a limitation > for available tunnel IDs because of lack of space in VXLAN VNI. > In vxlan mode OVN is limited by 4095 datapaths (LRs or non-transit LSs) > and 2047 logical switch ports per datapath. > > Prior to this patch vxlan mode was enabled automatically if at least one > chassis had encap of vxlan type. In scenarios where one want to use VXLAN > only for HW VTEP (RAMP) switch, such limitation makes no sence. > > This patch adds support for explicit disabling of vxlan mode via > Northbound database. > > 0: https://github.com/ovn-org/ovn/commit/b07f1bc3d068 > > CC: Ihar Hrachyshka <ihrac...@redhat.com> > Fixes: b07f1bc3d068 ("Add VXLAN support for non-VTEP datapath bindings") > Signed-off-by: Vladislav Odintsov <odiv...@gmail.com> > --- > northd/en-global-config.c | 9 +++- > northd/northd.c | 90 ++++++++++++++++++--------------------- > northd/northd.h | 6 ++- > ovn-nb.xml | 12 ++++++ > tests/ovn-northd.at | 29 +++++++++++++ > 5 files changed, 94 insertions(+), 52 deletions(-) > > diff --git a/northd/en-global-config.c b/northd/en-global-config.c > index 34e393b33..9310c4575 100644 > --- a/northd/en-global-config.c > +++ b/northd/en-global-config.c > @@ -115,8 +115,8 @@ en_global_config_run(struct engine_node *node , void > *data) > config_data->svc_monitor_mac); > } > > - char *max_tunid = xasprintf("%d", > - get_ovn_max_dp_key_local(sbrec_chassis_table)); > + init_vxlan_mode(&nb->options, sbrec_chassis_table); > + char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local()); > smap_replace(options, "max_tunid", max_tunid); > free(max_tunid); > > @@ -523,6 +523,11 @@ check_nb_options_out_of_sync(const struct > nbrec_nb_global *nb, > return true; > } > > + if (config_out_of_sync(&nb->options, &config_data->nb_options, > + "disable_vxlan_mode", false)) { > + return true; > + } > + > return false; > } > > diff --git a/northd/northd.c b/northd/northd.c > index c568f6360..859b233e8 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -90,6 +90,10 @@ static bool use_ct_inv_match = true; > */ > static bool default_acl_drop; > > +/* If this option is 'true' northd will use limited 24-bit space for > datapath > + * and ports tunnel key allocation (12 bits for each instead of default > 16). */ > +static bool vxlan_mode; > + > #define MAX_OVN_TAGS 4096 > > > @@ -875,24 +879,31 @@ join_datapaths(const struct > nbrec_logical_switch_table *nbrec_ls_table, > } > } > > -static bool > -is_vxlan_mode(const struct sbrec_chassis_table *sbrec_chassis_table) > +void > +init_vxlan_mode(const struct smap *nb_options, > + const struct sbrec_chassis_table *sbrec_chassis_table) > { > + if (smap_get_bool(nb_options, "disable_vxlan_mode", false)) { > + vxlan_mode = false; > + return; > + } > + > const struct sbrec_chassis *chassis; > SBREC_CHASSIS_TABLE_FOR_EACH (chassis, sbrec_chassis_table) { > for (int i = 0; i < chassis->n_encaps; i++) { > if (!strcmp(chassis->encaps[i]->type, "vxlan")) { > - return true; > + vxlan_mode = true; > + return; > } > } > } > - return false; > + vxlan_mode = false; > } > > uint32_t > -get_ovn_max_dp_key_local(const struct sbrec_chassis_table > *sbrec_chassis_table) > +get_ovn_max_dp_key_local(void) > { > - if (is_vxlan_mode(sbrec_chassis_table)) { > + if (vxlan_mode) { > /* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */ > return OVN_MAX_DP_VXLAN_KEY; > } > @@ -900,15 +911,14 @@ get_ovn_max_dp_key_local(const struct > sbrec_chassis_table *sbrec_chassis_table) > } > > static void > -ovn_datapath_allocate_key(const struct sbrec_chassis_table > *sbrec_ch_table, > - struct hmap *datapaths, struct hmap *dp_tnlids, > +ovn_datapath_allocate_key(struct hmap *datapaths, struct hmap *dp_tnlids, > struct ovn_datapath *od, uint32_t *hint) > { > if (!od->tunnel_key) { > od->tunnel_key = ovn_allocate_tnlid(dp_tnlids, "datapath", > - OVN_MIN_DP_KEY_LOCAL, > - > get_ovn_max_dp_key_local(sbrec_ch_table), > - hint); > + OVN_MIN_DP_KEY_LOCAL, > + get_ovn_max_dp_key_local(), > + hint); > if (!od->tunnel_key) { > if (od->sb) { > sbrec_datapath_binding_delete(od->sb); > @@ -921,7 +931,6 @@ ovn_datapath_allocate_key(const struct > sbrec_chassis_table *sbrec_ch_table, > > static void > ovn_datapath_assign_requested_tnl_id( > - const struct sbrec_chassis_table *sbrec_chassis_table, > struct hmap *dp_tnlids, struct ovn_datapath *od) > { > const struct smap *other_config = (od->nbs > @@ -930,8 +939,7 @@ ovn_datapath_assign_requested_tnl_id( > uint32_t tunnel_key = smap_get_int(other_config, "requested-tnl-key", > 0); > if (tunnel_key) { > const char *interconn_ts = smap_get(other_config, "interconn-ts"); > - if (!interconn_ts && is_vxlan_mode(sbrec_chassis_table) && > - tunnel_key >= 1 << 12) { > + if (!interconn_ts && vxlan_mode && tunnel_key >= 1 << 12) { > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for datapath %s is " > "incompatible with VXLAN", tunnel_key, > @@ -979,7 +987,6 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn, > const struct nbrec_logical_switch_table *nbrec_ls_table, > const struct nbrec_logical_router_table *nbrec_lr_table, > const struct sbrec_datapath_binding_table *sbrec_dp_table, > - const struct sbrec_chassis_table *sbrec_chassis_table, > struct ovn_datapaths *ls_datapaths, > struct ovn_datapaths *lr_datapaths, > struct ovs_list *lr_list) > @@ -994,12 +1001,11 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn, > struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids); > struct ovn_datapath *od; > LIST_FOR_EACH (od, list, &both) { > - ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, > &dp_tnlids, > - od); > + ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od); > } > LIST_FOR_EACH (od, list, &nb_only) { > - ovn_datapath_assign_requested_tnl_id(sbrec_chassis_table, > &dp_tnlids, > - od); } > + ovn_datapath_assign_requested_tnl_id(&dp_tnlids, od); > + } > > /* Keep nonconflicting tunnel IDs that are already assigned. */ > LIST_FOR_EACH (od, list, &both) { > @@ -1011,12 +1017,10 @@ build_datapaths(struct ovsdb_idl_txn *ovnsb_txn, > /* Assign new tunnel ids where needed. */ > uint32_t hint = 0; > LIST_FOR_EACH_SAFE (od, list, &both) { > - ovn_datapath_allocate_key(sbrec_chassis_table, > - datapaths, &dp_tnlids, od, &hint); > + ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint); > } > LIST_FOR_EACH_SAFE (od, list, &nb_only) { > - ovn_datapath_allocate_key(sbrec_chassis_table, > - datapaths, &dp_tnlids, od, &hint); > + ovn_datapath_allocate_key(datapaths, &dp_tnlids, od, &hint); > } > > /* Sync tunnel ids from nb to sb. */ > @@ -3976,16 +3980,14 @@ ovn_port_add_tnlid(struct ovn_port *op, uint32_t > tunnel_key) > * that the I-P engine can fallback to recompute if needed; otherwise > return > * true (even if the key is not allocated). */ > static bool > -ovn_port_assign_requested_tnl_id( > - const struct sbrec_chassis_table *sbrec_chassis_table, struct > ovn_port *op) > +ovn_port_assign_requested_tnl_id(struct ovn_port *op) > { > const struct smap *options = (op->nbsp > ? &op->nbsp->options > : &op->nbrp->options); > uint32_t tunnel_key = smap_get_int(options, "requested-tnl-key", 0); > if (tunnel_key) { > - if (is_vxlan_mode(sbrec_chassis_table) && > - tunnel_key >= OVN_VXLAN_MIN_MULTICAST) { > + if (vxlan_mode && tunnel_key >= OVN_VXLAN_MIN_MULTICAST) { > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > VLOG_WARN_RL(&rl, "Tunnel key %"PRIu32" for port %s " > "is incompatible with VXLAN", > @@ -4005,12 +4007,10 @@ ovn_port_assign_requested_tnl_id( > } > > static bool > -ovn_port_allocate_key(const struct sbrec_chassis_table > *sbrec_chassis_table, > - struct hmap *ports, > - struct ovn_port *op) > +ovn_port_allocate_key(struct hmap *ports, struct ovn_port *op) > { > if (!op->tunnel_key) { > - uint8_t key_bits = is_vxlan_mode(sbrec_chassis_table)? 12 : 16; > + uint8_t key_bits = vxlan_mode ? 12 : 16; > op->tunnel_key = ovn_allocate_tnlid(&op->od->port_tnlids, "port", > 1, (1u << (key_bits - 1)) - 1, > &op->od->port_key_hint); > @@ -4035,7 +4035,6 @@ ovn_port_allocate_key(const struct > sbrec_chassis_table *sbrec_chassis_table, > static void > build_ports(struct ovsdb_idl_txn *ovnsb_txn, > const struct sbrec_port_binding_table *sbrec_port_binding_table, > - const struct sbrec_chassis_table *sbrec_chassis_table, > const struct sbrec_mirror_table *sbrec_mirror_table, > const struct sbrec_mac_binding_table *sbrec_mac_binding_table, > const struct sbrec_ha_chassis_group_table > *sbrec_ha_chassis_group_table, > @@ -4069,10 +4068,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn, > /* Assign explicitly requested tunnel ids first. */ > struct ovn_port *op; > LIST_FOR_EACH (op, list, &both) { > - ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op); > + ovn_port_assign_requested_tnl_id(op); > } > LIST_FOR_EACH (op, list, &nb_only) { > - ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op); > + ovn_port_assign_requested_tnl_id(op); > } > > /* Keep nonconflicting tunnel IDs that are already assigned. */ > @@ -4084,10 +4083,10 @@ build_ports(struct ovsdb_idl_txn *ovnsb_txn, > > /* Assign new tunnel ids where needed. */ > LIST_FOR_EACH_SAFE (op, list, &both) { > - ovn_port_allocate_key(sbrec_chassis_table, ports, op); > + ovn_port_allocate_key(ports, op); > } > LIST_FOR_EACH_SAFE (op, list, &nb_only) { > - ovn_port_allocate_key(sbrec_chassis_table, ports, op); > + ovn_port_allocate_key(ports, op); > } > > /* For logical ports that are in both databases, update the southbound > @@ -4294,14 +4293,13 @@ ls_port_init(struct ovn_port *op, struct > ovsdb_idl_txn *ovnsb_txn, > struct hmap *ls_ports, struct ovn_datapath *od, > const struct sbrec_port_binding *sb, > const struct sbrec_mirror_table *sbrec_mirror_table, > - const struct sbrec_chassis_table *sbrec_chassis_table, > struct ovsdb_idl_index *sbrec_chassis_by_name, > struct ovsdb_idl_index *sbrec_chassis_by_hostname) > { > op->od = od; > parse_lsp_addrs(op); > /* Assign explicitly requested tunnel ids first. */ > - if (!ovn_port_assign_requested_tnl_id(sbrec_chassis_table, op)) { > + if (!ovn_port_assign_requested_tnl_id(op)) { > return false; > } > if (sb) { > @@ -4318,7 +4316,7 @@ ls_port_init(struct ovn_port *op, struct > ovsdb_idl_txn *ovnsb_txn, > sbrec_port_binding_set_logical_port(op->sb, op->key); > } > /* Assign new tunnel ids where needed. */ > - if (!ovn_port_allocate_key(sbrec_chassis_table, ls_ports, op)) { > + if (!ovn_port_allocate_key(ls_ports, op)) { > return false; > } > ovn_port_update_sbrec(ovnsb_txn, sbrec_chassis_by_name, > @@ -4332,15 +4330,13 @@ ls_port_create(struct ovsdb_idl_txn *ovnsb_txn, > struct hmap *ls_ports, > const char *key, const struct nbrec_logical_switch_port > *nbsp, > struct ovn_datapath *od, const struct sbrec_port_binding > *sb, > const struct sbrec_mirror_table *sbrec_mirror_table, > - const struct sbrec_chassis_table *sbrec_chassis_table, > struct ovsdb_idl_index *sbrec_chassis_by_name, > struct ovsdb_idl_index *sbrec_chassis_by_hostname) > { > struct ovn_port *op = ovn_port_create(ls_ports, key, nbsp, NULL, > NULL); > hmap_insert(&od->ports, &op->dp_node, hmap_node_hash(&op->key_node)); > - if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, > - sbrec_mirror_table, sbrec_chassis_table, > + if (!ls_port_init(op, ovnsb_txn, ls_ports, od, sb, sbrec_mirror_table, > sbrec_chassis_by_name, sbrec_chassis_by_hostname)) { > ovn_port_destroy(ls_ports, op); > return NULL; > @@ -4357,7 +4353,6 @@ ls_port_reinit(struct ovn_port *op, struct > ovsdb_idl_txn *ovnsb_txn, > struct ovn_datapath *od, > const struct sbrec_port_binding *sb, > const struct sbrec_mirror_table *sbrec_mirror_table, > - const struct sbrec_chassis_table *sbrec_chassis_table, > struct ovsdb_idl_index *sbrec_chassis_by_name, > struct ovsdb_idl_index *sbrec_chassis_by_hostname) > { > @@ -4365,8 +4360,7 @@ ls_port_reinit(struct ovn_port *op, struct > ovsdb_idl_txn *ovnsb_txn, > op->sb = sb; > ovn_port_set_nb(op, nbsp, nbrp); > op->l3dgw_port = op->cr_port = NULL; > - return ls_port_init(op, ovnsb_txn, ls_ports, od, sb, > - sbrec_mirror_table, sbrec_chassis_table, > + return ls_port_init(op, ovnsb_txn, ls_ports, od, sb, > sbrec_mirror_table, > sbrec_chassis_by_name, sbrec_chassis_by_hostname); > } > > @@ -4511,7 +4505,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn > *ovnsb_idl_txn, > op = ls_port_create(ovnsb_idl_txn, &nd->ls_ports, > new_nbsp->name, new_nbsp, od, NULL, > ni->sbrec_mirror_table, > - ni->sbrec_chassis_table, > ni->sbrec_chassis_by_name, > ni->sbrec_chassis_by_hostname); > if (!op) { > @@ -4543,7 +4536,6 @@ ls_handle_lsp_changes(struct ovsdb_idl_txn > *ovnsb_idl_txn, > if (!ls_port_reinit(op, ovnsb_idl_txn, &nd->ls_ports, > new_nbsp, NULL, > od, sb, ni->sbrec_mirror_table, > - ni->sbrec_chassis_table, > ni->sbrec_chassis_by_name, > ni->sbrec_chassis_by_hostname)) { > goto fail; > @@ -17300,11 +17292,12 @@ ovnnb_db_run(struct northd_input *input_data, > use_common_zone = smap_get_bool(input_data->nb_options, > "use_common_zone", > false); > > + init_vxlan_mode(input_data->nb_options, > input_data->sbrec_chassis_table); > + > build_datapaths(ovnsb_txn, > input_data->nbrec_logical_switch_table, > input_data->nbrec_logical_router_table, > input_data->sbrec_datapath_binding_table, > - input_data->sbrec_chassis_table, > &data->ls_datapaths, > &data->lr_datapaths, &data->lr_list); > build_lb_datapaths(input_data->lbs, input_data->lbgrps, > @@ -17312,7 +17305,6 @@ ovnnb_db_run(struct northd_input *input_data, > &data->lb_datapaths_map, > &data->lb_group_datapaths_map); > build_ports(ovnsb_txn, > input_data->sbrec_port_binding_table, > - input_data->sbrec_chassis_table, > input_data->sbrec_mirror_table, > input_data->sbrec_mac_binding_table, > input_data->sbrec_ha_chassis_group_table, > diff --git a/northd/northd.h b/northd/northd.h > index 5e9fa4745..ed9d992fb 100644 > --- a/northd/northd.h > +++ b/northd/northd.h > @@ -788,6 +788,10 @@ lr_has_multiple_gw_ports(const struct ovn_datapath > *od) > return od->n_l3dgw_ports > 1 && !od->is_gw_router; > } > > -uint32_t get_ovn_max_dp_key_local(const struct sbrec_chassis_table *); > +void > +init_vxlan_mode(const struct smap *nb_options, > + const struct sbrec_chassis_table *sbrec_chassis_table); > + > +uint32_t get_ovn_max_dp_key_local(void); > > #endif /* NORTHD_H */ > diff --git a/ovn-nb.xml b/ovn-nb.xml > index b652046a7..9b2cb355e 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -381,6 +381,18 @@ > of SB changes would be very noticeable. > </column> > > + <column name="options" key="disable_vxlan_mode"> > + Be default if at least one chassis in OVN cluster has VXLAN encap, > + northd will run in a `vxlan mode`, which means it splits 24-bit > + VXLAN VNI for datapath and port tunnel IDs allocation evenly. > Maximum > + datapaths count in this mode is 4095 and maximum ports per > datapath is > + 2047. Rest of IDs are used for multicast groups. > + In case VXLAN encaps are needed on chassis only to support HW VTEP > + functionality, and main encap type is GENEVE or STT, set this > option to > + `false` to use defaults -- 16-bit space for datapath tunnel IDS > and 15 > + bits for port tunnel IDs. > + </column> > + > <group title="Options for configuring interconnection route > advertisement"> > <p> > These options control how routes are advertised between OVN > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index cd53755b2..f5d025852 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -2819,6 +2819,35 @@ AT_CHECK( > get_tunnel_keys > AT_CHECK([test $lsp02 = 3 && test $ls1 = 123]) > > +AT_CLEANUP > +]) > +OVN_FOR_EACH_NORTHD_NO_HV([ > +AT_SETUP([check vxlan mode disabling]) > +ovn_start > + > +# Create a fake chassis with vxlan encap to implicitly enable vxlan mode. > +ovn-sbctl \ > + --id=@e create encap chassis_name=hv1 ip="192.168.0.1" type="vxlan" \ > + -- --id=@c create chassis name=hv1 encaps=@e > + > +cmd="ovn-nbctl --wait=sb" > +for i in {1..4097..1}; do > + cmd="${cmd} -- ls-add lsw-${i}" > +done > + > +eval $cmd > + > +check_row_count nb:Logical_Switch 4097 > +wait_row_count sb:Datapath_Binding 4095 > + > +OVS_WAIT_UNTIL([grep "all datapath tunnel ids exhausted" > northd/ovn-northd.log]) > + > +# Explicitly disable vxlan mode and check that two remaining datapaths > were created. > +check ovn-nbctl set NB_Global . options:disable_vxlan_mode=true > + > +check_row_count nb:Logical_Switch 4097 > +wait_row_count sb:Datapath_Binding 4097 > + > AT_CLEANUP > ]) > > -- > 2.44.0 > > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev