Hi Alexandra and Vladislav, Thx for the patch. Please consider this is just a first review of the patch. I guess you need to rebase it on top of ovn main branch.
> 1) Added a new option "distributed" for load balancers. > With this feature, balancers will work distributedly across compute nodes, > balancing to only local backends. is this option working even when the LB is applied to LS? If not, should we add any sanity check? > > 2) If the balancer is running on a router with dgp, the router will no longer > be centralized. > > 3) For such balancers, it is necessary to specify ip_port_mapping for the > correct formation of logical flows. > > 4) Balancing occurs through a new action 'ct_lb_mark_local'. > > Example: > Load Balancer: lb1 with VIP 10.255.0.1 and distributed option enabled. > Fabric is configured with a static ECMP route for 10.255.0.1/32: > nexthop via ip_host1 weight 1 (hosts backend1) > nexthop via ip_host2 weight 1 (hosts backend2) > nexthop via ip_host3 weight 2 (hosts backend3 and backend4) > > As part of testing, following estimates of distribution of requests to > balancers were obtained: > [root@dev11 ~]# for i in $(seq 5000); do curl http://10.255.0.1:80 > 2>/dev/null ; echo ; done | awk '{print $2}' | sort | uniq -c > 1265 “backend 4", > 1260 “backend 3", > 1224 “backend 2", > 1251 “backend 1", > Thus, requests using ecmp balancing are distributed between backends > approximately evenly. > > Suggested-by: Vladislav Odintsov <[email protected]> > Signed-off-by: Alexandra Rukomoinikova <[email protected]> > --- > v1 --> v2: split tests for ipv4/6 cases > --- > northd/en-lb-data.c | 8 ++ > northd/en-lb-data.h | 3 + > northd/en-lr-stateful.c | 4 + > northd/en-lr-stateful.h | 2 + > northd/lb.c | 93 ++++++++------- > northd/lb.h | 4 + > northd/northd.c | 169 ++++++++++++++++++++------- > northd/northd.h | 5 + > ovn-nb.xml | 13 ++- > tests/ovn-northd.at | 247 ++++++++++++++++++++++++++++++++++++++++ > tests/system-ovn.at | 245 +++++++++++++++++++++++++++++++++++++++ > 11 files changed, 709 insertions(+), 84 deletions(-) > > diff --git a/northd/en-lb-data.c b/northd/en-lb-data.c > index 6d52d465e..6547a961f 100644 > --- a/northd/en-lb-data.c > +++ b/northd/en-lb-data.c > @@ -166,6 +166,7 @@ lb_data_load_balancer_handler(struct engine_node *node, > void *data) > add_crupdated_lb_to_tracked_data(lb, trk_lb_data, > lb->health_checks); > trk_lb_data->has_routable_lb |= lb->routable; > + trk_lb_data->has_distributed_lb |= lb->is_distributed; > continue; > } > > @@ -180,6 +181,7 @@ lb_data_load_balancer_handler(struct engine_node *node, > void *data) > add_deleted_lb_to_tracked_data(lb, trk_lb_data, > lb->health_checks); > trk_lb_data->has_routable_lb |= lb->routable; > + trk_lb_data->has_distributed_lb |= lb->is_distributed; > } else { > /* Load balancer updated. */ > bool health_checks = lb->health_checks; > @@ -189,11 +191,13 @@ lb_data_load_balancer_handler(struct engine_node *node, > void *data) > sset_swap(&lb->ips_v6, &old_ips_v6); > enum lb_neighbor_responder_mode neigh_mode = lb->neigh_mode; > bool routable = lb->routable; > + bool distributed_mode = lb->is_distributed; > ovn_northd_lb_reinit(lb, tracked_lb); > health_checks |= lb->health_checks; > struct crupdated_lb *clb = add_crupdated_lb_to_tracked_data( > lb, trk_lb_data, health_checks); > trk_lb_data->has_routable_lb |= lb->routable; > + trk_lb_data->has_distributed_lb |= lb->is_distributed; > > /* Determine the inserted and deleted vips and store them in > * the tracked data. */ > @@ -226,6 +230,10 @@ lb_data_load_balancer_handler(struct engine_node *node, > void *data) > /* If neigh_mode is updated trigger a full recompute. */ > return EN_UNHANDLED; > } > + if (distributed_mode != lb->is_distributed) { > + /* If neigh_mode is updated trigger a full recompute. */ > + return EN_UNHANDLED; > + } > } > } > > diff --git a/northd/en-lb-data.h b/northd/en-lb-data.h > index 1da087656..90e85b8c4 100644 > --- a/northd/en-lb-data.h > +++ b/northd/en-lb-data.h > @@ -82,6 +82,9 @@ struct tracked_lb_data { > > /* Indicates if any lb (in the tracked data) has 'routable' flag set. */ > bool has_routable_lb; > + > + /* Indicates if any lb (in the tracked data) has 'distibuted' flag set. > */ > + bool has_distributed_lb; > }; > > /* Datapath (logical switch) to lb/lbgrp association data. */ > diff --git a/northd/en-lr-stateful.c b/northd/en-lr-stateful.c > index 5eec1e11a..55788c06c 100644 > --- a/northd/en-lr-stateful.c > +++ b/northd/en-lr-stateful.c > @@ -325,7 +325,9 @@ lr_stateful_lb_data_handler(struct engine_node *node, > void *data_) > const struct ovn_datapath *od = > ovn_datapaths_find_by_index(input_data.lr_datapaths, > lr_stateful_rec->lr_index); > + > lr_stateful_rec->has_lb_vip = od_has_lb_vip(od); > + lr_stateful_rec->has_distributed_lb = od->is_distributed; > } > > return EN_HANDLED_UPDATED; > @@ -527,7 +529,9 @@ lr_stateful_record_create(struct lr_stateful_table *table, > if (nbr->n_nat) { > lr_stateful_rebuild_vip_nats(lr_stateful_rec); > } > + > lr_stateful_rec->has_lb_vip = od_has_lb_vip(od); > + lr_stateful_rec->has_distributed_lb = od->is_distributed; > > hmap_insert(&table->entries, &lr_stateful_rec->key_node, > uuid_hash(&lr_stateful_rec->nbr_uuid)); > diff --git a/northd/en-lr-stateful.h b/northd/en-lr-stateful.h > index 146f768c3..3b0c54521 100644 > --- a/northd/en-lr-stateful.h > +++ b/northd/en-lr-stateful.h > @@ -59,6 +59,8 @@ struct lr_stateful_record { > > bool has_lb_vip; > > + bool has_distributed_lb; > + > /* Load Balancer vIPs relevant for this datapath. */ > struct ovn_lb_ip_set *lb_ips; > > diff --git a/northd/lb.c b/northd/lb.c > index 919557ec4..7db2ba1fd 100644 > --- a/northd/lb.c > +++ b/northd/lb.c > @@ -85,12 +85,12 @@ ovn_lb_ip_set_clone(struct ovn_lb_ip_set *lb_ip_set) > return clone; > } > > -static > -void ovn_northd_lb_vip_init(struct ovn_northd_lb_vip *lb_vip_nb, > - const struct ovn_lb_vip *lb_vip, > - const struct nbrec_load_balancer *nbrec_lb, > - const char *vip_port_str, const char > *backend_ips, > - bool template) > +static void > +ovn_northd_lb_vip_init(struct ovn_northd_lb_vip *lb_vip_nb, > + const struct ovn_lb_vip *lb_vip, > + const struct nbrec_load_balancer *nbrec_lb, > + const char *vip_port_str, const char *backend_ips, > + bool template) > { > lb_vip_nb->backend_ips = xstrdup(backend_ips); > lb_vip_nb->n_backends = vector_len(&lb_vip->backends); > @@ -101,19 +101,24 @@ void ovn_northd_lb_vip_init(struct ovn_northd_lb_vip > *lb_vip_nb, > } > > /* > - * Initializes health check configuration for load balancer VIP > - * backends. Parses the ip_port_mappings in the format : > - * "ip:logical_port:src_ip[:az_name]". > + * Parses ip_port_mappings in the format : > + * "ip:logical_port[:src_ip][:az_name]". > + * src_ip parameter is optional when distributed mode is enabled, > + * without health checks configured. > * If az_name is present and non-empty, it indicates this is a > * remote service monitor (backend is in another availability zone), > * it should be propogated to another AZ by interconnection processing. > + * This configuration required for health check and distributed working > + * of load_balancer. > */ > static void > -ovn_lb_vip_backends_health_check_init(const struct ovn_northd_lb *lb, > - const struct ovn_lb_vip *lb_vip, > - struct ovn_northd_lb_vip *lb_vip_nb) > +ovn_lb_vip_backends_ip_port_mappings_init(const struct ovn_northd_lb *lb, > + const struct ovn_lb_vip *lb_vip, > + struct ovn_northd_lb_vip > *lb_vip_nb) > { > struct ds key = DS_EMPTY_INITIALIZER; > + bool allow_without_src_ip = lb->is_distributed > + && !lb_vip_nb->lb_health_check; > > for (size_t j = 0; j < vector_len(&lb_vip->backends); j++) { > const struct ovn_lb_backend *backend = > @@ -127,16 +132,26 @@ ovn_lb_vip_backends_health_check_init(const struct > ovn_northd_lb *lb, > continue; > } > > - char *svc_mon_src_ip = NULL; > - char *az_name = NULL; > + struct ovn_northd_lb_backend *backend_nb = NULL; > + char *port_name = NULL, *az_name = NULL, *first_colon = NULL; > + char *svc_mon_src_ip = NULL, *src_ip = NULL; > bool is_remote = false; > - char *port_name = xstrdup(s); > - char *src_ip = NULL; > > - char *first_colon = strchr(port_name, ':'); > - if (!first_colon) { > - free(port_name); > - continue; > + port_name = xstrdup(s); > + first_colon = strchr(port_name, ':'); > + if (!first_colon && allow_without_src_ip) { > + if (!*port_name) { > + VLOG_WARN("Empty port name in distributed mode for IP %s", > + ds_cstr(&key)); > + goto cleanup; > + } > + src_ip = NULL; src_ip seems to not be used after init_backend label, so no need to set it here. > + az_name = NULL; > + is_remote = false; since we are setting is_remote to false we do not need to set az_name to NULL here, right? > + goto init_backend; > + } else if (!first_colon) { > + VLOG_WARN("Expected ':' separator for: %s", port_name); > + goto cleanup; > } > *first_colon = '\0'; > > @@ -145,8 +160,7 @@ ovn_lb_vip_backends_health_check_init(const struct > ovn_northd_lb *lb, > char *ip_end = strchr(first_colon + 2, ']'); > if (!ip_end) { > VLOG_WARN("Malformed IPv6 address in backend %s", s); > - free(port_name); > - continue; > + goto cleanup; > } > > src_ip = first_colon + 2; > @@ -157,8 +171,7 @@ ovn_lb_vip_backends_health_check_init(const struct > ovn_northd_lb *lb, > if (!*az_name) { > VLOG_WARN("Empty AZ name specified for backend %s", > port_name); > - free(port_name); > - continue; > + goto cleanup; > } > is_remote = true; > } > @@ -172,32 +185,33 @@ ovn_lb_vip_backends_health_check_init(const struct > ovn_northd_lb *lb, > if (!*az_name) { > VLOG_WARN("Empty AZ name specified for backend %s", > port_name); > - free(port_name); > - continue; > + goto cleanup; > } > - is_remote = true; > + is_remote = true; > } > } > > struct sockaddr_storage svc_mon_src_addr; > if (!src_ip || !inet_parse_address(src_ip, &svc_mon_src_addr)) { > VLOG_WARN("Invalid svc mon src IP %s", src_ip ? src_ip : "NULL"); > + goto cleanup; > } else { > struct ds src_ip_s = DS_EMPTY_INITIALIZER; > ss_format_address_nobracks(&svc_mon_src_addr, &src_ip_s); > svc_mon_src_ip = ds_steal_cstr(&src_ip_s); > } > > - if (svc_mon_src_ip) { > - struct ovn_northd_lb_backend *backend_nb = > - &lb_vip_nb->backends_nb[j]; > - backend_nb->health_check = true; > - backend_nb->logical_port = xstrdup(port_name); > - backend_nb->svc_mon_src_ip = svc_mon_src_ip; > - backend_nb->az_name = is_remote ? xstrdup(az_name) : NULL; > - backend_nb->remote_backend = is_remote; > - } > +init_backend: > + backend_nb = &lb_vip_nb->backends_nb[j]; > + backend_nb->health_check = lb_vip_nb->lb_health_check; > + backend_nb->logical_port = xstrdup(port_name); > + backend_nb->svc_mon_src_ip = svc_mon_src_ip; > + backend_nb->az_name = is_remote ? xstrdup(az_name) : NULL; > + backend_nb->remote_backend = is_remote; > + backend_nb->distributed_backend = lb->is_distributed ? true : false; just: backend_nb->distributed_backend = lb->is_distributed; > +cleanup: > free(port_name); > + continue; no need to continue here > } > > ds_destroy(&key); > @@ -364,6 +378,9 @@ ovn_northd_lb_init(struct ovn_northd_lb *lb, > lb->hairpin_snat_ip = xstrdup(snat_ip); > } > > + lb->is_distributed = smap_get_bool(&nbrec_lb->options, "distributed", > + false); > + > sset_init(&lb->ips_v4); > sset_init(&lb->ips_v6); > struct smap_node *node; > @@ -403,8 +420,8 @@ ovn_northd_lb_init(struct ovn_northd_lb *lb, > } > n_vips++; > > - if (lb_vip_nb->lb_health_check) { > - ovn_lb_vip_backends_health_check_init(lb, lb_vip, lb_vip_nb); > + if (lb_vip_nb->lb_health_check || lb->is_distributed) { > + ovn_lb_vip_backends_ip_port_mappings_init(lb, lb_vip, lb_vip_nb); > } > } > > diff --git a/northd/lb.h b/northd/lb.h > index 43a8a1850..bd7fe641c 100644 > --- a/northd/lb.h > +++ b/northd/lb.h > @@ -74,6 +74,9 @@ struct ovn_northd_lb { > /* Indicates if the load balancer has health checks configured. */ > bool health_checks; > > + /* Indicates if distributed option is enabled for load balancer. */ > + bool is_distributed; > + > char *hairpin_snat_ip; > }; > > @@ -90,6 +93,7 @@ struct ovn_northd_lb_backend { > bool health_check; > /* Set to true if port does not locate in local AZ. */ > bool remote_backend; > + bool distributed_backend; > /* Logical port to which the ip belong to. */ > char *logical_port; > /* Source IP address to be used for service monitoring. */ > diff --git a/northd/northd.c b/northd/northd.c > index 8b5753f16..3551680b9 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -554,6 +554,7 @@ ovn_datapath_create(struct hmap *datapaths, const struct > uuid *key, > od->localnet_ports = VECTOR_EMPTY_INITIALIZER(struct ovn_port *); > od->lb_with_stateless_mode = false; > od->ipam_info_initialized = false; > + od->is_distributed = false; > od->tunnel_key = sdp->sb_dp->tunnel_key; > init_mcast_info_for_datapath(od); > od->datapath_lflows = lflow_ref_create(); > @@ -3283,6 +3284,54 @@ ovn_lb_svc_create(struct ovsdb_idl_txn *ovnsb_txn, > } > } > > +static bool > +is_backend_available(const struct ovn_northd_lb *lb, > + const struct ovn_lb_backend *backend, > + const struct ovn_northd_lb_backend *backend_nb, > + const struct svc_monitors_map_data *svc_mons_data) > +{ > + const char *protocol = lb->nlb->protocol; > + if (!protocol || !protocol[0]) { > + protocol = "tcp"; > + } > + > + struct service_monitor_info *mon_info = > + get_service_mon(svc_mons_data->local_svc_monitors_map, > + svc_mons_data->ic_learned_svc_monitors_map, > + backend->ip_str, > + backend_nb->logical_port, > + backend->port, > + protocol); > + > + if (!mon_info) { > + return false; > + } > + > + ovs_assert(mon_info->sbrec_mon); > + > + if (mon_info->sbrec_mon->status && > + strcmp(mon_info->sbrec_mon->status, "online")) { > + return false; > + } > + > + return true; > +} > + > +static inline void > +append_lb_backend_to_action(const struct ovn_lb_backend *backend, > + const struct ovn_northd_lb_backend *backend_nb, > + bool distributed_mode, > + struct ds *action) > +{ > + bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&backend->ip); > + > + if (distributed_mode) { > + ds_put_format(action, "\"%s\":", backend_nb->logical_port); > + } > + ds_put_format(action, ipv6 ? "[%s]:%"PRIu16"," : "%s:%"PRIu16",", > + backend->ip_str, backend->port); > +} > + > static bool > build_lb_vip_actions(const struct ovn_northd_lb *lb, > const struct ovn_lb_vip *lb_vip, > @@ -3308,9 +3357,11 @@ build_lb_vip_actions(const struct ovn_northd_lb *lb, > } > } > > - if (lb_vip_nb->lb_health_check) { > - ds_put_cstr(action, "ct_lb_mark(backends="); > + ds_put_format(action, "%s", lb->is_distributed > + ? "ct_lb_mark_local(backends=" > + : "ct_lb_mark(backends="); should we move ds_put_format() inside the if () condition? > > + if (lb_vip_nb->lb_health_check || lb->is_distributed) { > size_t i = 0; > size_t n_active_backends = 0; > const struct ovn_lb_backend *backend; > @@ -3318,45 +3369,38 @@ build_lb_vip_actions(const struct ovn_northd_lb *lb, > struct ovn_northd_lb_backend *backend_nb = > &lb_vip_nb->backends_nb[i++]; > > - if (!backend_nb->health_check) { > - continue; > - } > - > - const char *protocol = lb->nlb->protocol; > - if (!protocol || !protocol[0]) { > - protocol = "tcp"; > + /* Validation of cases of incorrect parameter > + * settings at the backend level. */ > + if (lb_vip_nb->lb_health_check && > + !backend_nb->health_check) { > + continue; > } > > - struct service_monitor_info *mon_info = > - get_service_mon(svc_mons_data->local_svc_monitors_map, > - svc_mons_data->ic_learned_svc_monitors_map, > - backend->ip_str, > - backend_nb->logical_port, > - backend->port, > - protocol); > - > - if (!mon_info) { > + if (lb->is_distributed && > + !backend_nb->distributed_backend ) { > continue; > } > > - ovs_assert(mon_info->sbrec_mon); > - if (mon_info->sbrec_mon->status && > - strcmp(mon_info->sbrec_mon->status, "online")) { > + if (backend_nb->health_check && > + !is_backend_available(lb, > + backend, > + backend_nb, > + svc_mons_data)) { > continue; > } > > n_active_backends++; > - bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&backend->ip); > - ds_put_format(action, ipv6 ? "[%s]:%"PRIu16"," : "%s:%"PRIu16",", > - backend->ip_str, backend->port); > + append_lb_backend_to_action(backend, > + backend_nb, > + backend_nb->distributed_backend, > + action); > } > ds_chomp(action, ','); > > drop = !n_active_backends && !lb_vip->empty_backend_rej; > reject = !n_active_backends && lb_vip->empty_backend_rej; > } else { > - ds_put_format(action, "ct_lb_mark(backends=%s", > - lb_vip_nb->backend_ips); > + ds_put_format(action, "%s", lb_vip_nb->backend_ips); > } > > if (reject) { > @@ -3393,6 +3437,19 @@ build_lb_vip_actions(const struct ovn_northd_lb *lb, > return reject; > } > > +static inline void > +handle_od_lb_datapath_modes(struct ovn_datapath *od, > + struct ovn_lb_datapaths *lb_dps) > +{ > + if (od->lb_with_stateless_mode) { > + hmapx_add(&lb_dps->ls_lb_with_stateless_mode, od); > + } > + > + if (lb_dps->lb->is_distributed) { > + od->is_distributed = true; > + } > +} > + > static void > build_lb_datapaths(const struct hmap *lbs, const struct hmap *lb_groups, > struct ovn_datapaths *ls_datapaths, > @@ -3435,9 +3492,7 @@ build_lb_datapaths(const struct hmap *lbs, const struct > hmap *lb_groups, > lb_dps = ovn_lb_datapaths_find(lb_datapaths_map, lb_uuid); > ovs_assert(lb_dps); > ovn_lb_datapaths_add_ls(lb_dps, 1, &od, ods_size(ls_datapaths)); > - if (od->lb_with_stateless_mode) { > - hmapx_add(&lb_dps->ls_lb_with_stateless_mode, od); > - } > + handle_od_lb_datapath_modes(od, lb_dps); > } > > for (size_t i = 0; i < od->nbs->n_load_balancer_group; i++) { > @@ -3470,6 +3525,7 @@ build_lb_datapaths(const struct hmap *lbs, const struct > hmap *lb_groups, > &od->nbr->load_balancer[i]->header_.uuid; > lb_dps = ovn_lb_datapaths_find(lb_datapaths_map, lb_uuid); > ovs_assert(lb_dps); > + handle_od_lb_datapath_modes(od, lb_dps); > ovn_lb_datapaths_add_lr(lb_dps, 1, &od, ods_size(lr_datapaths)); > } > } > @@ -3824,6 +3880,7 @@ sync_pb_for_lrp(struct ovn_port *op, > > bool always_redirect = > !lr_stateful_rec->lrnat_rec->has_distributed_nat && > + !lr_stateful_rec->has_distributed_lb && > !l3dgw_port_has_associated_vtep_lports(op->primary_port); > > const char *redirect_type = smap_get(&op->nbrp->options, > @@ -5404,10 +5461,7 @@ northd_handle_lb_data_changes(struct tracked_lb_data > *trk_lb_data, > lb_dps = ovn_lb_datapaths_find(lb_datapaths_map, > &uuidnode->uuid); > ovs_assert(lb_dps); > ovn_lb_datapaths_add_ls(lb_dps, 1, &od, ods_size(ls_datapaths)); > - > - if (od->lb_with_stateless_mode) { > - hmapx_add(&lb_dps->ls_lb_with_stateless_mode, od); > - } > + handle_od_lb_datapath_modes(od, lb_dps); > > /* Add the lb to the northd tracked data. */ > hmapx_add(&nd_changes->trk_lbs.crupdated, lb_dps); > @@ -5446,6 +5500,7 @@ northd_handle_lb_data_changes(struct tracked_lb_data > *trk_lb_data, > lb_dps = ovn_lb_datapaths_find(lb_datapaths_map, > &uuidnode->uuid); > ovs_assert(lb_dps); > ovn_lb_datapaths_add_lr(lb_dps, 1, &od, ods_size(lr_datapaths)); > + handle_od_lb_datapath_modes(od, lb_dps); > > /* Add the lb to the northd tracked data. */ > hmapx_add(&nd_changes->trk_lbs.crupdated, lb_dps); > @@ -10688,8 +10743,13 @@ build_lswitch_ip_unicast_lookup(struct ovn_port *op, > : debug_drop_action(); > > if (lsp_is_router(op->nbsp) && op->peer && op->peer->nbrp) { > + /* Distributed gateway ports default to centralized mode. > + * They operate in distributed mode only when configured > + * on their bound router. */ > + bool peer_lrp_is_centralized = !op->peer->od->is_distributed; > + > /* For ports connected to logical routers add flows to bypass the > - * broadcast flooding of ARP/ND requests in table 19. We direct the > + * broadcast flooding of ARP/ND requests in table 22. We direct the > * requests only to the router port that owns the IP address. > */ > build_lswitch_rport_arp_req_flows(op->peer, op->od, op, lflows, > @@ -10704,7 +10764,8 @@ build_lswitch_ip_unicast_lookup(struct ovn_port *op, > ds_put_format(match, "eth.dst == %s", > op->peer->lrp_networks.ea_s); > } > > - if (!vector_is_empty(&op->peer->od->l3dgw_ports) && > + if (peer_lrp_is_centralized && > + !vector_is_empty(&op->peer->od->l3dgw_ports) && > !vector_is_empty(&op->od->localnet_ports)) { > add_lrp_chassis_resident_check(op->peer, match); > } else if (op->cr_port) { > @@ -12438,6 +12499,14 @@ build_distr_lrouter_nat_flows_for_lb(struct > lrouter_nat_lb_flows_ctx *ctx, > size_t new_match_len = ctx->new_match->length; > size_t undnat_match_len = ctx->undnat_match->length; > > + bool lb_is_centralized = !ctx->lb->is_distributed; > + > + /* If load balancer is distributed, then the response traffic > + * must be returned through the distributed port.*/ > + const char *gw_outport = lb_is_centralized ? > + dgp->cr_port->json_key : > + dgp->json_key; nit: const char *gw_outport = lb_is_centralized ? dgp->cr_port->json_key : dgp->json_key; > + > /* (NOTE) dnat_action: Add the first LB backend IP as a destination > * action of the lr_in_dnat NAT rule. Including the backend IP is useful > * for accepting packets coming from a chassis that does not have > @@ -12472,8 +12541,9 @@ build_distr_lrouter_nat_flows_for_lb(struct > lrouter_nat_lb_flows_ctx *ctx, > meter = copp_meter_get(COPP_REJECT, od->nbr->copp, > ctx->meter_groups); > } > > - if (!vector_is_empty(&ctx->lb_vip->backends) || > - !ctx->lb_vip->empty_backend_rej) { > + if (lb_is_centralized > + && (!vector_is_empty(&ctx->lb_vip->backends) || > + !ctx->lb_vip->empty_backend_rej)) { nit: please align the ! condtions: if (lb_is_centralized && (!vector_is_empty() || !ctx->lb_vip->empty_backend_rej)) { } > ds_put_format(ctx->new_match, " && is_chassis_resident(%s)", > dgp->cr_port->json_key); > } > @@ -12520,8 +12590,8 @@ build_distr_lrouter_nat_flows_for_lb(struct > lrouter_nat_lb_flows_ctx *ctx, > */ > ds_put_format(ctx->undnat_match, ") && outport == %s", dgp->json_key); > ds_clear(ctx->gw_redir_action); > - ds_put_format(ctx->gw_redir_action, "outport = %s; next;", > - dgp->cr_port->json_key); > + ds_put_format(ctx->gw_redir_action, > + "outport = %s; next;", gw_outport); > > ovn_lflow_add_with_hint(ctx->lflows, od, S_ROUTER_IN_GW_REDIRECT, > 200, ds_cstr(ctx->undnat_match), > @@ -12530,9 +12600,14 @@ build_distr_lrouter_nat_flows_for_lb(struct > lrouter_nat_lb_flows_ctx *ctx, > lflow_ref); > ds_truncate(ctx->undnat_match, undnat_match_len); > > - ds_put_format(ctx->undnat_match, ") && (inport == %s || outport == %s)" > - " && is_chassis_resident(%s)", dgp->json_key, > dgp->json_key, > - dgp->cr_port->json_key); > + ds_put_format(ctx->undnat_match, ") && (inport == %s || outport == %s)", > + dgp->json_key, dgp->json_key); > + > + if (lb_is_centralized) { > + ds_put_format(ctx->undnat_match, " && is_chassis_resident(%s)", > + dgp->cr_port->json_key); > + } > + > /* Use the LB protocol as matching criteria for out undnat and snat when > * creating LBs with stateless NAT. */ > if (stateless_nat) { > @@ -13855,6 +13930,10 @@ build_gateway_mtu_flow(struct lflow_table *lflows, > struct ovn_port *op, > static bool > consider_l3dgw_port_is_centralized(struct ovn_port *op) > { > + if (op->od->is_distributed) { > + return false; > + } > + > if (l3dgw_port_has_associated_vtep_lports(op)) { > return false; > } > @@ -16084,7 +16163,7 @@ build_ipv6_input_flows_for_lrouter_port( > * router's own IP address. */ > for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) { > ds_clear(match); > - if (lrp_is_l3dgw(op)) { > + if (lrp_is_l3dgw(op) && !op->od->is_distributed) { > /* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s > * should only be sent from the gateway chassi, so that > * upstream MAC learning points to the gateway chassis. > @@ -16361,13 +16440,15 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op, > > /* ARP reply. These flows reply to ARP requests for the router's own > * IP address. */ > + bool od_distributed = op->od->is_distributed; > for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { > ds_clear(match); > ds_put_format(match, "arp.spa == %s/%u", > op->lrp_networks.ipv4_addrs[i].network_s, > op->lrp_networks.ipv4_addrs[i].plen); > > - if (!vector_is_empty(&op->od->l3dgw_ports) && op->peer > + if (!od_distributed && > + !vector_is_empty(&op->od->l3dgw_ports) && op->peer > && !vector_is_empty(&op->peer->od->localnet_ports)) { > add_lrp_chassis_resident_check(op, match); > } > diff --git a/northd/northd.h b/northd/northd.h > index 32134d36e..b87855310 100644 > --- a/northd/northd.h > +++ b/northd/northd.h > @@ -450,6 +450,11 @@ struct ovn_datapath { > /* Indicates that the LS has valid vni associated with it. */ > bool has_evpn_vni; > > + /* True if datapath has some distributed dependencies. > + * Currently, this only applies to load balancers attached to datapath > + * with distributed mode enabled. */ > + bool is_distributed; > + > /* OVN northd only needs to know about logical router gateway ports for > * NAT/LB on a distributed router. The "distributed gateway ports" are > * populated only when there is a gateway chassis or ha chassis group > diff --git a/ovn-nb.xml b/ovn-nb.xml > index b5fe44e53..c6cd6dccd 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -2378,13 +2378,15 @@ > <p> > Maps from endpoint IP to a colon-separated pair of logical port > name > and source IP, > - e.g. <code><var>port_name</var>:<var>sourc_ip</var></code> for > IPv4. > + e.g. <code><var>port_name</var>:<var>source_ip</var></code> for > IPv4. > Health checks are sent to this port with the specified source IP. > For IPv6 square brackets must be used around IP address, e.g: > - <code><var>port_name</var>:<var>[sourc_ip]</var></code> > + <code><var>port_name</var>:<var>[source_ip]</var></code> > Remote endpoint: > Specify :target_zone_name at the end of the above syntax to create > remote health checks in a specific zone. > + For distributed load balancers - ip_port_mappings is required. > + In the absence of health checks - source_ip is optional. > </p> > > <p> > @@ -2587,6 +2589,13 @@ or > traffic may be dropped in scenarios where we have different chassis > for each DGP. This option is set to <code>false</code> by default. > </column> > + > + <column name="options" key="distributed"> > + Enabling this option distributes load balancing across compute nodes, > + where traffic is routed only to local backends. To ensure proper > + operation, you must configure <ref column="ip_port_mappings"/> first. > + </column> > + > </group> > </table> > > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at > index 15bc82d58..5d6ea75a6 100644 > --- a/tests/ovn-northd.at > +++ b/tests/ovn-northd.at > @@ -17872,6 +17872,9 @@ ovn_start > # ip_port_mappings syntax: ip:lport_name:src_ip:<az_name>(for remote lports) > > check ovn-nbctl ls-add ls1 > +check ovn-nbctl lr-add lr1 > + > +ovn-appctl -t ovn-northd vlog/disable-rate-limit > > check ovn-nbctl lb-add lb1_ipv4 1.1.1.1:80 > 192.168.0.1:10880,192.168.0.2:10880,192.168.0.3:10880 > AT_CHECK([ovn-nbctl --wait=sb \ > @@ -17941,6 +17944,47 @@ check ovn-nbctl set load_balancer lb1_ipv4 > ip_port_mappings:192.168.0.1=lport1:1 > check_row_count sb:Service_Monitor 0 > > OVS_WAIT_UNTIL([grep "Empty AZ name specified" northd/ovn-northd.log]) > + > +check ovn-nbctl lb-del lb1_ipv4 > + > +# Check correct setup of distributed load balancers. > +echo > northd/ovn-northd.log > +check ovn-nbctl lb-add lb_distubuted 1.1.1.1:80 > 192.168.0.1:10880,192.168.0.2:10880 > +check ovn-nbctl lr-lb-add lr1 lb_distubuted > +check ovn-nbctl set load_balancer lb_distubuted options:distributed=true > + > +# Check that load balancer does not work in a distributed mode - there is no > ip_port_mappings setting > +ovn-sbctl lflow-list lr1 > lr1_lflow > +AT_CHECK([cat lr1_lflow | grep lr_in_dnat | grep priority=120 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && > ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80), > action=(drop;) > +]) > + > +# Check that the load balancer has only one backend available since the only > one backend has ip_port_mappings > +check ovn-nbctl set load_balancer lb_distubuted > ip_port_mappings:192.168.0.1=lport1 > +ovn-sbctl lflow-list lr1 > lr1_lflow > +AT_CHECK([cat lr1_lflow | grep lr_in_dnat | grep priority=120 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && > ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80), > action=(ct_lb_mark_local(backends="lport1":192.168.0.1:10880);) > +]) > + > +check ovn-nbctl set load_balancer lb_distubuted > ip_port_mappings:192.168.0.2=lport2 > +ovn-sbctl lflow-list lr1 > lr1_lflow > +AT_CHECK([cat lr1_lflow | grep lr_in_dnat | grep priority=120 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && > ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80), > action=(ct_lb_mark_local(backends="lport1":192.168.0.1:10880,"lport2":192.168.0.2:10880);) > +]) > + > +# Check if health check is configured, ip_port_mappings must be provided. > +AT_CHECK([ovn-nbctl --wait=sb \ > + -- --id=@hc create Load_Balancer_Health_Check vip="1.1.1.1\:80" \ > + options:failure_count=100 \ > + -- add Load_Balancer lb_distubuted health_check @hc | uuidfilt], > [0], [<0> > +]) > + > +ovn-sbctl lflow-list lr1 > lr1_lflow > +OVS_WAIT_UNTIL([grep "Expected ':' separator for:" northd/ovn-northd.log]) > +AT_CHECK([cat lr1_lflow | grep lr_in_dnat | grep priority=120 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && > ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80), > action=(drop;) > +]) > + > AT_CLEANUP > ]) > > @@ -18452,6 +18496,209 @@ AT_CHECK( > AT_CLEANUP > ]) > > +OVN_FOR_EACH_NORTHD_NO_HV([ > +AT_SETUP([Distributed lb: logical-flow test - IPv4/IPv6 case]) > +ovn_start > + > +# (1) Create two load balancers, IPv4 and IPv6, attach them to a router that > has a distributed gateway port. > +# (2) Set the gateway to an existing gateway - verify that all router flows > are centralized (arp/nd). > +# (3) Change the gateway to a non-existent one, make one load balancer > distributed - verify that all router flows for the router become distributed. > +# (4) Verify that flows for the distributed load balancer are distributed, > and for the second load balancer are centralized. > +# (5) Make the second load balancer distributed, verify its flows. > +# (6) Remove the option from one load balancer, verify that the logic is > maintained that if at least one load balancer has the option - the entire > router is distributed. > + > +check ovn-nbctl ls-add outside > + > +check ovn-nbctl lsp-add outside outside \ > + -- lsp-set-addresses outside unknown \ > + -- lsp-set-type outside localnet > + > +check ovn-nbctl --wait=sb set Logical_Switch_Port outside tag_request=2 > + > +check ovn-nbctl lsp-add outside outside-down \ > + -- lsp-set-type outside-down router \ > + -- lsp-set-addresses outside-down router \ > + -- lsp-set-options outside-down router-port=lr1-up > + > +check ovn-nbctl lr-add lr1 \ > + -- lrp-add lr1 lr1-up 11:11:11:11:11:11 169.254.0.1/24 > 2001:db8:abcd:0002::bad/64 \ > + -- lrp-add lr1 lr1-down 12:12:12:12:12:12 192.168.0.1/24 > 2001:db8:abcd:0001::c0fe/64 > + > +check ovn-nbctl ls-add ls1 \ > + -- lsp-add ls1 lport1 \ > + -- lsp-set-addresses lport1 "13:13:13:13:13:13 192.168.0.101" \ > + -- lsp-add ls1 lport2 \ > + -- lsp-set-addresses lport2 "14:14:14:14:14:14 192.168.0.102" > + > +check ovn-nbctl lsp-add ls1 ls1-up \ > + -- lsp-set-type ls1-up router \ > + -- lsp-set-addresses ls1-up router \ > + -- lsp-set-options ls1-up router-port=lr1-down > + > +check ovn-nbctl --wait=sb sync > + > +check ovn-nbctl ha-chassis-group-add gateway > +check ovn-nbctl ha-chassis-group-add-chassis gateway hv1 1 > +ha_g_uuid=$(fetch_column nb:HA_Chassis_Group _uuid name=gateway) > +lr1_up_uuid=$(fetch_column nb:Logical_Router_Port _uuid name=lr1-up) > +check ovn-nbctl set logical_router_port $lr1_up_uuid > ha_chassis_group=$ha_g_uuid > + > +check ovn-nbctl --wait=sb sync > + > +check ovn-nbctl lb-add lb1_ipv4 1.1.1.1:80 > 192.168.0.101:10880,192.168.0.102:10880 > +check ovn-nbctl set Load_Balancer lb1_ipv4 > ip_port_mappings:192.168.0.101=lport1:192.168.0.199 > +check ovn-nbctl set Load_Balancer lb1_ipv4 > ip_port_mappings:192.168.0.102=lport2:192.168.0.199 > +check ovn-nbctl lr-lb-add lr1 lb1_ipv4 > +check ovn-nbctl --wait=sb sync > + > +check ovn-nbctl lb-add lb1_ipv6 [[2000::1]]:80 [[2001:db8:abcd:1::2]]:10882 > +check ovn-nbctl set Load_Balancer lb1_ipv6 > ip_port_mappings:\"[[2001:db8:abcd:1::2]]\"=\"lport1\" > +check ovn-nbctl lr-lb-add lr1 lb1_ipv6 > +check ovn-nbctl --wait=sb sync > + > +ovn-sbctl lflow-list lr1 > lr1_lflows_before > +ovn-sbctl lflow-list outside > outside_lflows_before > + > +AT_CHECK([cat outside_lflows_before | grep ls_in_l2_lkup | grep priority=50 > | ovn_strip_lflows], [0], [dnl > + table=??(ls_in_l2_lkup ), priority=50 , match=(eth.dst == > 11:11:11:11:11:11 && is_chassis_resident("cr-lr1-up")), action=(outport = > "outside-down"; output;) > +]) > + > +AT_CHECK([cat lr1_lflows_before | grep lr_in_ip_input | grep priority=90 | > grep 169.254.0.1 | ovn_strip_lflows], [0], [dnl > + table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" > && arp.op == 1 && arp.tpa == 169.254.0.1 && arp.spa == 169.254.0.0/24 && > is_chassis_resident("cr-lr1-up")), action=(eth.dst = eth.src; eth.src = > xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha = > xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1; > output;) > + table=??(lr_in_ip_input ), priority=90 , match=(ip4.dst == > 169.254.0.1 && icmp4.type == 8 && icmp4.code == 0), action=(ip4.dst <-> > ip4.src; ip.ttl = 255; icmp4.type = 0; flags.loopback = 1; next; ) > +]) > + > +AT_CHECK([cat lr1_lflows_before | grep lr_in_ip_input | grep priority=90 | > grep 2001:db8:abcd:2::bad | ovn_strip_lflows], [0], [dnl > + table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" > && ip6.dst == {2001:db8:abcd:2::bad, ff02::1:ff00:bad} && nd_ns && nd.target > == 2001:db8:abcd:2::bad && is_chassis_resident("cr-lr1-up")), > action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll > = xreg0[[0..47]]; outport = inport; flags.loopback = 1; output; };) > + table=??(lr_in_ip_input ), priority=90 , match=(ip6.dst == > {2001:db8:abcd:2::bad, fe80::1311:11ff:fe11:1111} && icmp6.type == 128 && > icmp6.code == 0), action=(ip6.dst <-> ip6.src; ip.ttl = 255; icmp6.type = > 129; flags.loopback = 1; next; ) > +]) > + > +AT_CHECK([cat lr1_lflows_before | grep lr_in_admission | grep priority=50 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_admission ), priority=50 , match=(eth.dst == > 11:11:11:11:11:11 && inport == "lr1-up" && is_chassis_resident("cr-lr1-up")), > action=(xreg0[[0..47]] = 11:11:11:11:11:11; next;) > + table=??(lr_in_admission ), priority=50 , match=(eth.dst == > 12:12:12:12:12:12 && inport == "lr1-down"), action=(xreg0[[0..47]] = > 12:12:12:12:12:12; next;) > + table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport > == "lr1-down"), action=(xreg0[[0..47]] = 12:12:12:12:12:12; next;) > + table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport > == "lr1-up"), action=(xreg0[[0..47]] = 11:11:11:11:11:11; next;) > +]) > + > +AT_CHECK([cat lr1_lflows_before | grep lr_out_undnat | grep priority=120 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_out_undnat ), priority=120 , match=(ip4 && ((ip4.src == > 192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src == > 10880)) && (inport == "lr1-up" || outport == "lr1-up") && > is_chassis_resident("cr-lr1-up")), action=(ct_dnat;) > + table=??(lr_out_undnat ), priority=120 , match=(ip6 && ((ip6.src == > 2001:db8:abcd:1::2 && tcp.src == 10882)) && (inport == "lr1-up" || outport == > "lr1-up") && is_chassis_resident("cr-lr1-up")), action=(ct_dnat;) > +]) > + > +AT_CHECK([cat lr1_lflows_before | grep lr_in_gw_redirect | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_gw_redirect ), priority=0 , match=(1), action=(next;) > + table=??(lr_in_gw_redirect ), priority=200 , match=(ip4 && ((ip4.src == > 192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src == > 10880)) && outport == "lr1-up"), action=(outport = "cr-lr1-up"; next;) > + table=??(lr_in_gw_redirect ), priority=200 , match=(ip6 && ((ip6.src == > 2001:db8:abcd:1::2 && tcp.src == 10882)) && outport == "lr1-up"), > action=(outport = "cr-lr1-up"; next;) > + table=??(lr_in_gw_redirect ), priority=50 , match=(outport == > "lr1-up"), action=(outport = "cr-lr1-up"; next;) > +]) > + > +AT_CHECK([cat lr1_lflows_before | grep lr_in_dnat | grep priority=120 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && > ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80 && > is_chassis_resident("cr-lr1-up")), > action=(ct_lb_mark(backends=192.168.0.101:10880,192.168.0.102:10880);) > + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && > ip6 && ip6.dst == 2000::1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80 && > is_chassis_resident("cr-lr1-up")), > action=(ct_lb_mark(backends=[[2001:db8:abcd:1::2]]:10882);) > +]) > + > +check ovn-nbctl clear logical_router_port $lr1_up_uuid ha_chassis_group > +check ovn-nbctl ha-chassis-group-del gateway > +check ovn-nbctl ha-chassis-group-add gateway2 > +check ovn-nbctl ha-chassis-group-add-chassis gateway2 test 1 > +ha_g_uuid=$(fetch_column nb:HA_Chassis_Group _uuid name=gateway2) > +lr1_up_uuid=$(fetch_column nb:Logical_Router_Port _uuid name=lr1-up) > +check ovn-nbctl set logical_router_port $lr1_up_uuid > ha_chassis_group=$ha_g_uuid > + > +check ovn-nbctl set load_balancer lb1_ipv4 options:distributed=true > +check ovn-nbctl --wait=hv sync > + > +ovn-sbctl lflow-list outside > outside_lflows_after > +ovn-sbctl lflow-list lr1 > lr1_lflows_after > + > +AT_CHECK([cat outside_lflows_after | grep ls_in_l2_lkup | grep priority=50 | > ovn_strip_lflows], [0], [dnl > + table=??(ls_in_l2_lkup ), priority=50 , match=(eth.dst == > 11:11:11:11:11:11), action=(outport = "outside-down"; output;) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_in_ip_input | grep priority=90 | > grep 169.254.0.1 | ovn_strip_lflows], [0], [dnl > + table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" > && arp.op == 1 && arp.tpa == 169.254.0.1 && arp.spa == 169.254.0.0/24), > action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply > */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport > = inport; flags.loopback = 1; output;) > + table=??(lr_in_ip_input ), priority=90 , match=(ip4.dst == > 169.254.0.1 && icmp4.type == 8 && icmp4.code == 0), action=(ip4.dst <-> > ip4.src; ip.ttl = 255; icmp4.type = 0; flags.loopback = 1; next; ) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_in_ip_input | grep priority=90 | > grep 2001:db8:abcd:2::bad | ovn_strip_lflows], [0], [dnl > + table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" > && ip6.dst == {2001:db8:abcd:2::bad, ff02::1:ff00:bad} && nd_ns && nd.target > == 2001:db8:abcd:2::bad), action=(nd_na_router { eth.src = xreg0[[0..47]]; > ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; > flags.loopback = 1; output; };) > + table=??(lr_in_ip_input ), priority=90 , match=(ip6.dst == > {2001:db8:abcd:2::bad, fe80::1311:11ff:fe11:1111} && icmp6.type == 128 && > icmp6.code == 0), action=(ip6.dst <-> ip6.src; ip.ttl = 255; icmp6.type = > 129; flags.loopback = 1; next; ) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_in_admission | grep priority=50 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_admission ), priority=50 , match=(eth.dst == > 11:11:11:11:11:11 && inport == "lr1-up"), action=(xreg0[[0..47]] = > 11:11:11:11:11:11; next;) > + table=??(lr_in_admission ), priority=50 , match=(eth.dst == > 12:12:12:12:12:12 && inport == "lr1-down"), action=(xreg0[[0..47]] = > 12:12:12:12:12:12; next;) > + table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport > == "lr1-down"), action=(xreg0[[0..47]] = 12:12:12:12:12:12; next;) > + table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport > == "lr1-up"), action=(xreg0[[0..47]] = 11:11:11:11:11:11; next;) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_out_undnat | grep priority=120 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_out_undnat ), priority=120 , match=(ip4 && ((ip4.src == > 192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src == > 10880)) && (inport == "lr1-up" || outport == "lr1-up")), action=(ct_dnat;) > + table=??(lr_out_undnat ), priority=120 , match=(ip6 && ((ip6.src == > 2001:db8:abcd:1::2 && tcp.src == 10882)) && (inport == "lr1-up" || outport == > "lr1-up") && is_chassis_resident("cr-lr1-up")), action=(ct_dnat;) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_in_gw_redirect | ovn_strip_lflows], > [0], [dnl > + table=??(lr_in_gw_redirect ), priority=0 , match=(1), action=(next;) > + table=??(lr_in_gw_redirect ), priority=200 , match=(ip4 && ((ip4.src == > 192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src == > 10880)) && outport == "lr1-up"), action=(outport = "lr1-up"; next;) > + table=??(lr_in_gw_redirect ), priority=200 , match=(ip6 && ((ip6.src == > 2001:db8:abcd:1::2 && tcp.src == 10882)) && outport == "lr1-up"), > action=(outport = "cr-lr1-up"; next;) > + table=??(lr_in_gw_redirect ), priority=50 , match=(outport == > "lr1-up"), action=(outport = "cr-lr1-up"; next;) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_in_dnat | grep priority=120 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && > ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80), > action=(ct_lb_mark_local(backends="lport1":192.168.0.101:10880,"lport2":192.168.0.102:10880);) > + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && > ip6 && ip6.dst == 2000::1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80 && > is_chassis_resident("cr-lr1-up")), > action=(ct_lb_mark(backends=[[2001:db8:abcd:1::2]]:10882);) > +]) > + > +check ovn-nbctl set load_balancer lb1_ipv6 options:distributed=true > +check ovn-nbctl --wait=hv sync > + > +ovn-sbctl lflow-list outside > outside_lflows_after > +ovn-sbctl lflow-list lr1 > lr1_lflows_after > + > +AT_CHECK([cat lr1_lflows_after | grep lr_out_undnat | grep priority=120 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_out_undnat ), priority=120 , match=(ip4 && ((ip4.src == > 192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src == > 10880)) && (inport == "lr1-up" || outport == "lr1-up")), action=(ct_dnat;) > + table=??(lr_out_undnat ), priority=120 , match=(ip6 && ((ip6.src == > 2001:db8:abcd:1::2 && tcp.src == 10882)) && (inport == "lr1-up" || outport == > "lr1-up")), action=(ct_dnat;) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_in_gw_redirect | ovn_strip_lflows], > [0], [dnl > + table=??(lr_in_gw_redirect ), priority=0 , match=(1), action=(next;) > + table=??(lr_in_gw_redirect ), priority=200 , match=(ip4 && ((ip4.src == > 192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src == > 10880)) && outport == "lr1-up"), action=(outport = "lr1-up"; next;) > + table=??(lr_in_gw_redirect ), priority=200 , match=(ip6 && ((ip6.src == > 2001:db8:abcd:1::2 && tcp.src == 10882)) && outport == "lr1-up"), > action=(outport = "lr1-up"; next;) > + table=??(lr_in_gw_redirect ), priority=50 , match=(outport == > "lr1-up"), action=(outport = "cr-lr1-up"; next;) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_in_dnat | grep priority=120 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && > ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80), > action=(ct_lb_mark_local(backends="lport1":192.168.0.101:10880,"lport2":192.168.0.102:10880);) > + table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel && > ip6 && ip6.dst == 2000::1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80), > action=(ct_lb_mark_local(backends="lport1":[[2001:db8:abcd:1::2]]:10882);) > +]) > + > +check ovn-nbctl set load_balancer lb1_ipv6 options:distributed=false > +check ovn-nbctl --wait=hv sync > + > +AT_CHECK([cat outside_lflows_after | grep ls_in_l2_lkup | grep priority=50 | > ovn_strip_lflows], [0], [dnl > + table=??(ls_in_l2_lkup ), priority=50 , match=(eth.dst == > 11:11:11:11:11:11), action=(outport = "outside-down"; output;) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_in_ip_input | grep priority=90 | > grep 169.254.0.1 | ovn_strip_lflows], [0], [dnl > + table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" > && arp.op == 1 && arp.tpa == 169.254.0.1 && arp.spa == 169.254.0.0/24), > action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply > */ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport > = inport; flags.loopback = 1; output;) > + table=??(lr_in_ip_input ), priority=90 , match=(ip4.dst == > 169.254.0.1 && icmp4.type == 8 && icmp4.code == 0), action=(ip4.dst <-> > ip4.src; ip.ttl = 255; icmp4.type = 0; flags.loopback = 1; next; ) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_in_ip_input | grep priority=90 | > grep 2001:db8:abcd:2::bad | ovn_strip_lflows], [0], [dnl > + table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" > && ip6.dst == {2001:db8:abcd:2::bad, ff02::1:ff00:bad} && nd_ns && nd.target > == 2001:db8:abcd:2::bad), action=(nd_na_router { eth.src = xreg0[[0..47]]; > ip6.src = nd.target; nd.tll = xreg0[[0..47]]; outport = inport; > flags.loopback = 1; output; };) > + table=??(lr_in_ip_input ), priority=90 , match=(ip6.dst == > {2001:db8:abcd:2::bad, fe80::1311:11ff:fe11:1111} && icmp6.type == 128 && > icmp6.code == 0), action=(ip6.dst <-> ip6.src; ip.ttl = 255; icmp6.type = > 129; flags.loopback = 1; next; ) > +]) > + > +AT_CHECK([cat lr1_lflows_after | grep lr_in_admission | grep priority=50 | > ovn_strip_lflows], [0], [dnl > + table=??(lr_in_admission ), priority=50 , match=(eth.dst == > 11:11:11:11:11:11 && inport == "lr1-up"), action=(xreg0[[0..47]] = > 11:11:11:11:11:11; next;) > + table=??(lr_in_admission ), priority=50 , match=(eth.dst == > 12:12:12:12:12:12 && inport == "lr1-down"), action=(xreg0[[0..47]] = > 12:12:12:12:12:12; next;) > + table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport > == "lr1-down"), action=(xreg0[[0..47]] = 12:12:12:12:12:12; next;) > + table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport > == "lr1-up"), action=(xreg0[[0..47]] = 11:11:11:11:11:11; next;) > +]) > + > +AT_CLEANUP > +]) > + > OVN_FOR_EACH_NORTHD_NO_HV([ > AT_SETUP([Unicast ARP flows]) > ovn_start > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index 373a87657..8bd3a3b2b 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -18976,3 +18976,248 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d > /.*terminating with signal 15.*/d"]) > AT_CLEANUP > ]) > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([Distributed Load Balancer: IPv6 case]) > +AT_KEYWORDS([ovnlb]) > + > +# Simple Test for Basic Functionality Verification: > +# client - br-ext - br-int - outside-switch - (distrubuted port) lr - backend > +# test case: > +# 1. Create 2 centralized load balancer(ipv4/ipv6), specifying gateway > chassis for router. > +# 2. Moving gateway to a non-existent chassis. > +# 3. Enable distributed option on load balancer. > +# 4. The distributed load balancer is expected to continue working. > + > +CHECK_CONNTRACK() > +CHECK_CONNTRACK_NAT() > + > +ovn_start > +OVS_TRAFFIC_VSWITCHD_START() > +ADD_BR([br-int]) > +ADD_BR([br-ext]) > + > +check ovs-ofctl add-flow br-ext action=normal > +# Set external-ids in br-int needed for ovn-controller > +check ovs-vsctl \ > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > + -- set Open_vSwitch . > external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > + -- set bridge br-int fail-mode=secure > other-config:disable-in-band=true \ > + -- set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext > + > +# Start ovn-controller > +start_daemon ovn-controller > + > +check ovn-nbctl lr-add lr > +check ovn-nbctl ls-add internal > +check ovn-nbctl ls-add public > + > +check ovn-nbctl lrp-add lr lr-pub 00:00:01:01:02:03 192.168.100.1/24 > +check ovn-nbctl lsp-add public pub-lr -- set Logical_Switch_Port pub-lr \ > + type=router options:router-port=lr-pub addresses=\"00:00:01:01:02:03\" > + > +check ovn-nbctl lrp-add lr lr-internal 00:00:01:01:02:04 192.168.200.1/24 > +check ovn-nbctl lsp-add internal internal-lr -- set Logical_Switch_Port > internal-lr \ > + type=router options:router-port=lr-internal > addresses=\"00:00:01:01:02:04\" > + > +check ovn-nbctl lsp-add internal server_ipv4 -- lsp-set-addresses > server_ipv4 "unknown" > + > +check ovn-nbctl lsp-add public ln_port \ > + -- lsp-set-addresses ln_port unknown \ > + -- lsp-set-type ln_port localnet \ > + -- lsp-set-options ln_port network_name=phynet > + > +check ovn-nbctl lrp-set-gateway-chassis lr-pub hv1 > + > +check ovn-nbctl lb-add lb1_ipv4 1.1.1.1:80 192.168.200.10:10880 > +check ovn-nbctl lr-lb-add lr lb1_ipv4 > + > +check ovn-nbctl --wait=hv sync > + > +ADD_NAMESPACES(client_ipv4) > +ADD_VETH(client_ipv4, client_ipv4, br-ext, "192.168.100.10/24", > "f0:00:00:01:02:03", \ > + "192.168.100.1") > + > +ADD_NAMESPACES(server_ipv4) > +ADD_VETH(server_ipv4, server_ipv4, br-int, "192.168.200.10/24", > "f0:00:0f:01:02:03", \ > + "192.168.200.1") > + > +NETNS_DAEMONIZE([server_ipv4], [nc -l -k 192.168.200.10 10880], > [serverv4.pid]) > + > +# Checking backend availability. > +NS_CHECK_EXEC([client_ipv4], [nc 1.1.1.1 80 -z], [0], [ignore], [ignore]) > + > +# Changing the gateway to a non-existent one. > +check ovn-nbctl clear logical_router_port lr-pub gateway_chassis > +check ovn-nbctl lrp-set-gateway-chassis lr-pub hv2 > + > +# ovn-controller currently does not recalculate local datapaths > +# when 'ha_chassis_group' change, so we reboot it. > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > +start_daemon ovn-controller > +wait_for_ports_up > + > +# Check public switch not in local datapaths > +AT_CHECK([ovn-appctl -t ovn-controller debug/dump-local-datapaths], [0], [dnl > +Local datapaths: > +Datapath: lr, type: router > +Datapath: internal, type: switch > +]) > + > +AT_CHECK([ovs-ofctl dump-groups br-int | sed -e > 's/table=[[0-9]]*/table=<cleared>/'], [0], [dnl > +NXST_GROUP_DESC reply (xid=0x2): > +]) > + > +check ovn-nbctl set load_balancer lb1_ipv4 options:distributed=true > +check ovn-nbctl set Load_Balancer lb1_ipv4 > ip_port_mappings:192.168.200.10=server_ipv4 > +check ovn-nbctl --wait=hv sync > + > +# Check that external switch has been added to local datapaths on > distrubuted nodes > +# when 'distributed' option is enabled on load balancer. > +AT_CHECK([ovn-appctl -t ovn-controller debug/dump-local-datapaths], [0], [dnl > +Local datapaths: > +Datapath: lr, type: router > +Datapath: internal, type: switch > +Datapath: public, type: switch > +]) > + > +AT_CHECK([ovs-ofctl dump-groups br-int | sed -e > 's/table=[[0-9]]*/table=<cleared>/'], [0], [dnl > +NXST_GROUP_DESC reply (xid=0x2): > + > group_id=1,type=select,selection_method=dp_hash,bucket=bucket_id:0,weight:100,actions=ct(commit,table=<cleared>,zone=NXM_NX_REG11[[0..15]],nat(dst=192.168.200.10:10880),exec(load:0x1->NXM_NX_CT_MARK[[1]])) > +]) > + > +# Checking backend availability. > +NS_CHECK_EXEC([client_ipv4], [nc 1.1.1.1 80 -z], [0], [ignore], [ignore]) > + > +check ovn-nbctl lb-del lb1_ipv4 > + > +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d > +/connection dropped.*/d"]) > +AT_CLEANUP > +]) > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([Distributed Load Balancer: IPv6 case]) > +AT_KEYWORDS([ovnlb]) > + > +# Simple Test for Basic Functionality Verification: > +# client - br-ext - br-int - outside-switch - (distrubuted port) lr - backend > +# test case: > +# 1. Create 2 centralized load balancer(ipv4/ipv6), specifying gateway > chassis for router. > +# 2. Moving gateway to a non-existent chassis. > +# 3. Enable distributed option on load balancer. > +# 4. The distributed load balancer is expected to continue working. > + > +CHECK_CONNTRACK() > +CHECK_CONNTRACK_NAT() > + > +ovn_start > +OVS_TRAFFIC_VSWITCHD_START() > +ADD_BR([br-int]) > +ADD_BR([br-ext]) > + > +check ovs-ofctl add-flow br-ext action=normal > +# Set external-ids in br-int needed for ovn-controller > +check ovs-vsctl \ > + -- set Open_vSwitch . external-ids:system-id=hv1 \ > + -- set Open_vSwitch . > external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > + -- set bridge br-int fail-mode=secure > other-config:disable-in-band=true \ > + -- set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext > + > +# Start ovn-controller > +start_daemon ovn-controller > + > +check ovn-nbctl lr-add lr > +check ovn-nbctl ls-add internal > +check ovn-nbctl ls-add public > + > +check ovn-nbctl lrp-add lr lr-pub 00:00:01:01:02:03 > 2001:db8:abcd:0002::bad/64 > +check ovn-nbctl lsp-add public pub-lr -- set Logical_Switch_Port pub-lr \ > + type=router options:router-port=lr-pub addresses=\"00:00:01:01:02:03\" > + > +check ovn-nbctl lrp-add lr lr-internal 00:00:01:01:02:04 > 2001:db8:abcd:0001::c0fe/64 > +check ovn-nbctl lsp-add internal internal-lr -- set Logical_Switch_Port > internal-lr \ > + type=router options:router-port=lr-internal > addresses=\"00:00:01:01:02:04\" > + > +check ovn-nbctl lsp-add internal server_ipv6 -- lsp-set-addresses > server_ipv6 "unknown" > + > +check ovn-nbctl lsp-add public ln_port \ > + -- lsp-set-addresses ln_port unknown \ > + -- lsp-set-type ln_port localnet \ > + -- lsp-set-options ln_port network_name=phynet > + > +check ovn-nbctl lrp-set-gateway-chassis lr-pub hv1 > + > +check ovn-nbctl lb-add lb1_ipv6 [[2000::1]]:80 [[2001:db8:abcd:1::2]]:10882 > +check ovn-nbctl lr-lb-add lr lb1_ipv6 > + > +check ovn-nbctl --wait=hv sync > + > +ADD_NAMESPACES(client_ipv6) > +ADD_VETH(client_ipv6, client_ipv6, br-ext, "2001:db8:abcd:2::f00d/64", > "f0:00:00:01:02:06", \ > + "2001:db8:abcd:0002::bad") > + > +ADD_NAMESPACES(server_ipv6) > +ADD_VETH(server_ipv6, server_ipv6, br-int, "2001:db8:abcd:1::2/64", > "f0:00:0f:01:02:04", \ > + "2001:db8:abcd:1::c0fe") > + > +# Wait for IPv6 address to be ready > +sleep 5 > + > +NETNS_DAEMONIZE([server_ipv6], [ncat -6 -l -k 2001:db8:abcd:1::2 10882], > [serverv6.pid]) > + > +# Checking backend availability. > +NS_CHECK_EXEC([client_ipv6], [nc -6 2000::1 80 -z], [0], [ignore], [ignore]) > + > +# Changing the gateway to a non-existent one. > +check ovn-nbctl clear logical_router_port lr-pub gateway_chassis > +check ovn-nbctl lrp-set-gateway-chassis lr-pub hv2 > + > +# ovn-controller currently does not recalculate local datapaths > +# when 'ha_chassis_group' change, so we reboot it. > +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > +start_daemon ovn-controller > +wait_for_ports_up > + > +# Check public switch not in local datapaths > +AT_CHECK([ovn-appctl -t ovn-controller debug/dump-local-datapaths], [0], [dnl > +Local datapaths: > +Datapath: lr, type: router > +Datapath: internal, type: switch > +]) > + > +AT_CHECK([ovs-ofctl dump-groups br-int | sed -e > 's/table=[[0-9]]*/table=<cleared>/'], [0], [dnl > +NXST_GROUP_DESC reply (xid=0x2): > +]) > + > +check ovn-nbctl set load_balancer lb1_ipv6 options:distributed=true > +check ovn-nbctl set Load_Balancer lb1_ipv6 > ip_port_mappings:\"[[2001:db8:abcd:1::2]]\"=\"server_ipv6\" > +check ovn-nbctl --wait=hv sync > + > +# Check that external switch has been added to local datapaths on > distrubuted nodes > +# when 'distributed' option is enabled on load balancer. > +AT_CHECK([ovn-appctl -t ovn-controller debug/dump-local-datapaths], [0], [dnl > +Local datapaths: > +Datapath: lr, type: router > +Datapath: internal, type: switch > +Datapath: public, type: switch > +]) > + > +AT_CHECK([ovs-ofctl dump-groups br-int | sed -e > 's/table=[[0-9]]*/table=<cleared>/'], [0], [dnl > +NXST_GROUP_DESC reply (xid=0x2): > + > group_id=1,type=select,selection_method=dp_hash,bucket=bucket_id:0,weight:100,actions=ct(commit,table=<cleared>,zone=NXM_NX_REG11[[0..15]],nat(dst=[[2001:db8:abcd:1::2]]:10882),exec(load:0x1->NXM_NX_CT_MARK[[1]])) > +]) > + > +# Checking backend availability. > +NS_CHECK_EXEC([client_ipv6], [nc -6 2000::1 80 -z], [0], [ignore], [ignore]) > + > +check ovn-nbctl lb-del lb1_ipv6 > + > +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d > +/connection dropped.*/d"]) > +AT_CLEANUP > +]) > -- > 2.48.1 > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
