1) Added a new option "distributed" for load balancers.
With this feature, balancers will work distributedly across compute nodes,
balancing to only local backends.
2) If the balancer is running on a router with dgp, the router will no longer
be centralized.
3) For such balancers, it is necessary to specify ip_port_mapping for the
correct formation of logical flows.
4) Balancing occurs through a new action 'ct_lb_mark_local'.
Example:
Load Balancer: lb1 with VIP 10.255.0.1 and distributed option enabled.
Fabric is configured with a static ECMP route for 10.255.0.1/32:
nexthop via ip_host1 weight 1 (hosts backend1)
nexthop via ip_host2 weight 1 (hosts backend2)
nexthop via ip_host3 weight 2 (hosts backend3 and backend4)
As part of testing, following estimates of distribution of requests to
balancers were obtained:
[root@dev11 ~]# for i in $(seq 5000); do curl http://10.255.0.1:80 2>/dev/null
; echo ; done | awk '{print $2}' | sort | uniq -c
1265 “backend 4",
1260 “backend 3",
1224 “backend 2",
1251 “backend 1",
Thus, requests using ecmp balancing are distributed between backends
approximately evenly.
Suggested-by: Vladislav Odintsov <[email protected]>
Signed-off-by: Alexandra Rukomoinikova <[email protected]>
---
v1 --> v2: split tests for ipv4/6 cases
---
northd/en-lb-data.c | 8 ++
northd/en-lb-data.h | 3 +
northd/en-lr-stateful.c | 4 +
northd/en-lr-stateful.h | 2 +
northd/lb.c | 93 ++++++++-------
northd/lb.h | 4 +
northd/northd.c | 169 ++++++++++++++++++++-------
northd/northd.h | 5 +
ovn-nb.xml | 13 ++-
tests/ovn-northd.at | 247 ++++++++++++++++++++++++++++++++++++++++
tests/system-ovn.at | 245 +++++++++++++++++++++++++++++++++++++++
11 files changed, 709 insertions(+), 84 deletions(-)
diff --git a/northd/en-lb-data.c b/northd/en-lb-data.c
index 6d52d465e..6547a961f 100644
--- a/northd/en-lb-data.c
+++ b/northd/en-lb-data.c
@@ -166,6 +166,7 @@ lb_data_load_balancer_handler(struct engine_node *node,
void *data)
add_crupdated_lb_to_tracked_data(lb, trk_lb_data,
lb->health_checks);
trk_lb_data->has_routable_lb |= lb->routable;
+ trk_lb_data->has_distributed_lb |= lb->is_distributed;
continue;
}
@@ -180,6 +181,7 @@ lb_data_load_balancer_handler(struct engine_node *node,
void *data)
add_deleted_lb_to_tracked_data(lb, trk_lb_data,
lb->health_checks);
trk_lb_data->has_routable_lb |= lb->routable;
+ trk_lb_data->has_distributed_lb |= lb->is_distributed;
} else {
/* Load balancer updated. */
bool health_checks = lb->health_checks;
@@ -189,11 +191,13 @@ lb_data_load_balancer_handler(struct engine_node *node,
void *data)
sset_swap(&lb->ips_v6, &old_ips_v6);
enum lb_neighbor_responder_mode neigh_mode = lb->neigh_mode;
bool routable = lb->routable;
+ bool distributed_mode = lb->is_distributed;
ovn_northd_lb_reinit(lb, tracked_lb);
health_checks |= lb->health_checks;
struct crupdated_lb *clb = add_crupdated_lb_to_tracked_data(
lb, trk_lb_data, health_checks);
trk_lb_data->has_routable_lb |= lb->routable;
+ trk_lb_data->has_distributed_lb |= lb->is_distributed;
/* Determine the inserted and deleted vips and store them in
* the tracked data. */
@@ -226,6 +230,10 @@ lb_data_load_balancer_handler(struct engine_node *node,
void *data)
/* If neigh_mode is updated trigger a full recompute. */
return EN_UNHANDLED;
}
+ if (distributed_mode != lb->is_distributed) {
+ /* If neigh_mode is updated trigger a full recompute. */
+ return EN_UNHANDLED;
+ }
}
}
diff --git a/northd/en-lb-data.h b/northd/en-lb-data.h
index 1da087656..90e85b8c4 100644
--- a/northd/en-lb-data.h
+++ b/northd/en-lb-data.h
@@ -82,6 +82,9 @@ struct tracked_lb_data {
/* Indicates if any lb (in the tracked data) has 'routable' flag set. */
bool has_routable_lb;
+
+ /* Indicates if any lb (in the tracked data) has 'distibuted' flag set. */
+ bool has_distributed_lb;
};
/* Datapath (logical switch) to lb/lbgrp association data. */
diff --git a/northd/en-lr-stateful.c b/northd/en-lr-stateful.c
index 5eec1e11a..55788c06c 100644
--- a/northd/en-lr-stateful.c
+++ b/northd/en-lr-stateful.c
@@ -325,7 +325,9 @@ lr_stateful_lb_data_handler(struct engine_node *node, void
*data_)
const struct ovn_datapath *od =
ovn_datapaths_find_by_index(input_data.lr_datapaths,
lr_stateful_rec->lr_index);
+
lr_stateful_rec->has_lb_vip = od_has_lb_vip(od);
+ lr_stateful_rec->has_distributed_lb = od->is_distributed;
}
return EN_HANDLED_UPDATED;
@@ -527,7 +529,9 @@ lr_stateful_record_create(struct lr_stateful_table *table,
if (nbr->n_nat) {
lr_stateful_rebuild_vip_nats(lr_stateful_rec);
}
+
lr_stateful_rec->has_lb_vip = od_has_lb_vip(od);
+ lr_stateful_rec->has_distributed_lb = od->is_distributed;
hmap_insert(&table->entries, &lr_stateful_rec->key_node,
uuid_hash(&lr_stateful_rec->nbr_uuid));
diff --git a/northd/en-lr-stateful.h b/northd/en-lr-stateful.h
index 146f768c3..3b0c54521 100644
--- a/northd/en-lr-stateful.h
+++ b/northd/en-lr-stateful.h
@@ -59,6 +59,8 @@ struct lr_stateful_record {
bool has_lb_vip;
+ bool has_distributed_lb;
+
/* Load Balancer vIPs relevant for this datapath. */
struct ovn_lb_ip_set *lb_ips;
diff --git a/northd/lb.c b/northd/lb.c
index 919557ec4..7db2ba1fd 100644
--- a/northd/lb.c
+++ b/northd/lb.c
@@ -85,12 +85,12 @@ ovn_lb_ip_set_clone(struct ovn_lb_ip_set *lb_ip_set)
return clone;
}
-static
-void ovn_northd_lb_vip_init(struct ovn_northd_lb_vip *lb_vip_nb,
- const struct ovn_lb_vip *lb_vip,
- const struct nbrec_load_balancer *nbrec_lb,
- const char *vip_port_str, const char *backend_ips,
- bool template)
+static void
+ovn_northd_lb_vip_init(struct ovn_northd_lb_vip *lb_vip_nb,
+ const struct ovn_lb_vip *lb_vip,
+ const struct nbrec_load_balancer *nbrec_lb,
+ const char *vip_port_str, const char *backend_ips,
+ bool template)
{
lb_vip_nb->backend_ips = xstrdup(backend_ips);
lb_vip_nb->n_backends = vector_len(&lb_vip->backends);
@@ -101,19 +101,24 @@ void ovn_northd_lb_vip_init(struct ovn_northd_lb_vip
*lb_vip_nb,
}
/*
- * Initializes health check configuration for load balancer VIP
- * backends. Parses the ip_port_mappings in the format :
- * "ip:logical_port:src_ip[:az_name]".
+ * Parses ip_port_mappings in the format :
+ * "ip:logical_port[:src_ip][:az_name]".
+ * src_ip parameter is optional when distributed mode is enabled,
+ * without health checks configured.
* If az_name is present and non-empty, it indicates this is a
* remote service monitor (backend is in another availability zone),
* it should be propogated to another AZ by interconnection processing.
+ * This configuration required for health check and distributed working
+ * of load_balancer.
*/
static void
-ovn_lb_vip_backends_health_check_init(const struct ovn_northd_lb *lb,
- const struct ovn_lb_vip *lb_vip,
- struct ovn_northd_lb_vip *lb_vip_nb)
+ovn_lb_vip_backends_ip_port_mappings_init(const struct ovn_northd_lb *lb,
+ const struct ovn_lb_vip *lb_vip,
+ struct ovn_northd_lb_vip *lb_vip_nb)
{
struct ds key = DS_EMPTY_INITIALIZER;
+ bool allow_without_src_ip = lb->is_distributed
+ && !lb_vip_nb->lb_health_check;
for (size_t j = 0; j < vector_len(&lb_vip->backends); j++) {
const struct ovn_lb_backend *backend =
@@ -127,16 +132,26 @@ ovn_lb_vip_backends_health_check_init(const struct
ovn_northd_lb *lb,
continue;
}
- char *svc_mon_src_ip = NULL;
- char *az_name = NULL;
+ struct ovn_northd_lb_backend *backend_nb = NULL;
+ char *port_name = NULL, *az_name = NULL, *first_colon = NULL;
+ char *svc_mon_src_ip = NULL, *src_ip = NULL;
bool is_remote = false;
- char *port_name = xstrdup(s);
- char *src_ip = NULL;
- char *first_colon = strchr(port_name, ':');
- if (!first_colon) {
- free(port_name);
- continue;
+ port_name = xstrdup(s);
+ first_colon = strchr(port_name, ':');
+ if (!first_colon && allow_without_src_ip) {
+ if (!*port_name) {
+ VLOG_WARN("Empty port name in distributed mode for IP %s",
+ ds_cstr(&key));
+ goto cleanup;
+ }
+ src_ip = NULL;
+ az_name = NULL;
+ is_remote = false;
+ goto init_backend;
+ } else if (!first_colon) {
+ VLOG_WARN("Expected ':' separator for: %s", port_name);
+ goto cleanup;
}
*first_colon = '\0';
@@ -145,8 +160,7 @@ ovn_lb_vip_backends_health_check_init(const struct
ovn_northd_lb *lb,
char *ip_end = strchr(first_colon + 2, ']');
if (!ip_end) {
VLOG_WARN("Malformed IPv6 address in backend %s", s);
- free(port_name);
- continue;
+ goto cleanup;
}
src_ip = first_colon + 2;
@@ -157,8 +171,7 @@ ovn_lb_vip_backends_health_check_init(const struct
ovn_northd_lb *lb,
if (!*az_name) {
VLOG_WARN("Empty AZ name specified for backend %s",
port_name);
- free(port_name);
- continue;
+ goto cleanup;
}
is_remote = true;
}
@@ -172,32 +185,33 @@ ovn_lb_vip_backends_health_check_init(const struct
ovn_northd_lb *lb,
if (!*az_name) {
VLOG_WARN("Empty AZ name specified for backend %s",
port_name);
- free(port_name);
- continue;
+ goto cleanup;
}
- is_remote = true;
+ is_remote = true;
}
}
struct sockaddr_storage svc_mon_src_addr;
if (!src_ip || !inet_parse_address(src_ip, &svc_mon_src_addr)) {
VLOG_WARN("Invalid svc mon src IP %s", src_ip ? src_ip : "NULL");
+ goto cleanup;
} else {
struct ds src_ip_s = DS_EMPTY_INITIALIZER;
ss_format_address_nobracks(&svc_mon_src_addr, &src_ip_s);
svc_mon_src_ip = ds_steal_cstr(&src_ip_s);
}
- if (svc_mon_src_ip) {
- struct ovn_northd_lb_backend *backend_nb =
- &lb_vip_nb->backends_nb[j];
- backend_nb->health_check = true;
- backend_nb->logical_port = xstrdup(port_name);
- backend_nb->svc_mon_src_ip = svc_mon_src_ip;
- backend_nb->az_name = is_remote ? xstrdup(az_name) : NULL;
- backend_nb->remote_backend = is_remote;
- }
+init_backend:
+ backend_nb = &lb_vip_nb->backends_nb[j];
+ backend_nb->health_check = lb_vip_nb->lb_health_check;
+ backend_nb->logical_port = xstrdup(port_name);
+ backend_nb->svc_mon_src_ip = svc_mon_src_ip;
+ backend_nb->az_name = is_remote ? xstrdup(az_name) : NULL;
+ backend_nb->remote_backend = is_remote;
+ backend_nb->distributed_backend = lb->is_distributed ? true : false;
+cleanup:
free(port_name);
+ continue;
}
ds_destroy(&key);
@@ -364,6 +378,9 @@ ovn_northd_lb_init(struct ovn_northd_lb *lb,
lb->hairpin_snat_ip = xstrdup(snat_ip);
}
+ lb->is_distributed = smap_get_bool(&nbrec_lb->options, "distributed",
+ false);
+
sset_init(&lb->ips_v4);
sset_init(&lb->ips_v6);
struct smap_node *node;
@@ -403,8 +420,8 @@ ovn_northd_lb_init(struct ovn_northd_lb *lb,
}
n_vips++;
- if (lb_vip_nb->lb_health_check) {
- ovn_lb_vip_backends_health_check_init(lb, lb_vip, lb_vip_nb);
+ if (lb_vip_nb->lb_health_check || lb->is_distributed) {
+ ovn_lb_vip_backends_ip_port_mappings_init(lb, lb_vip, lb_vip_nb);
}
}
diff --git a/northd/lb.h b/northd/lb.h
index 43a8a1850..bd7fe641c 100644
--- a/northd/lb.h
+++ b/northd/lb.h
@@ -74,6 +74,9 @@ struct ovn_northd_lb {
/* Indicates if the load balancer has health checks configured. */
bool health_checks;
+ /* Indicates if distributed option is enabled for load balancer. */
+ bool is_distributed;
+
char *hairpin_snat_ip;
};
@@ -90,6 +93,7 @@ struct ovn_northd_lb_backend {
bool health_check;
/* Set to true if port does not locate in local AZ. */
bool remote_backend;
+ bool distributed_backend;
/* Logical port to which the ip belong to. */
char *logical_port;
/* Source IP address to be used for service monitoring. */
diff --git a/northd/northd.c b/northd/northd.c
index 8b5753f16..3551680b9 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -554,6 +554,7 @@ ovn_datapath_create(struct hmap *datapaths, const struct
uuid *key,
od->localnet_ports = VECTOR_EMPTY_INITIALIZER(struct ovn_port *);
od->lb_with_stateless_mode = false;
od->ipam_info_initialized = false;
+ od->is_distributed = false;
od->tunnel_key = sdp->sb_dp->tunnel_key;
init_mcast_info_for_datapath(od);
od->datapath_lflows = lflow_ref_create();
@@ -3283,6 +3284,54 @@ ovn_lb_svc_create(struct ovsdb_idl_txn *ovnsb_txn,
}
}
+static bool
+is_backend_available(const struct ovn_northd_lb *lb,
+ const struct ovn_lb_backend *backend,
+ const struct ovn_northd_lb_backend *backend_nb,
+ const struct svc_monitors_map_data *svc_mons_data)
+{
+ const char *protocol = lb->nlb->protocol;
+ if (!protocol || !protocol[0]) {
+ protocol = "tcp";
+ }
+
+ struct service_monitor_info *mon_info =
+ get_service_mon(svc_mons_data->local_svc_monitors_map,
+ svc_mons_data->ic_learned_svc_monitors_map,
+ backend->ip_str,
+ backend_nb->logical_port,
+ backend->port,
+ protocol);
+
+ if (!mon_info) {
+ return false;
+ }
+
+ ovs_assert(mon_info->sbrec_mon);
+
+ if (mon_info->sbrec_mon->status &&
+ strcmp(mon_info->sbrec_mon->status, "online")) {
+ return false;
+ }
+
+ return true;
+}
+
+static inline void
+append_lb_backend_to_action(const struct ovn_lb_backend *backend,
+ const struct ovn_northd_lb_backend *backend_nb,
+ bool distributed_mode,
+ struct ds *action)
+{
+ bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&backend->ip);
+
+ if (distributed_mode) {
+ ds_put_format(action, "\"%s\":", backend_nb->logical_port);
+ }
+ ds_put_format(action, ipv6 ? "[%s]:%"PRIu16"," : "%s:%"PRIu16",",
+ backend->ip_str, backend->port);
+}
+
static bool
build_lb_vip_actions(const struct ovn_northd_lb *lb,
const struct ovn_lb_vip *lb_vip,
@@ -3308,9 +3357,11 @@ build_lb_vip_actions(const struct ovn_northd_lb *lb,
}
}
- if (lb_vip_nb->lb_health_check) {
- ds_put_cstr(action, "ct_lb_mark(backends=");
+ ds_put_format(action, "%s", lb->is_distributed
+ ? "ct_lb_mark_local(backends="
+ : "ct_lb_mark(backends=");
+ if (lb_vip_nb->lb_health_check || lb->is_distributed) {
size_t i = 0;
size_t n_active_backends = 0;
const struct ovn_lb_backend *backend;
@@ -3318,45 +3369,38 @@ build_lb_vip_actions(const struct ovn_northd_lb *lb,
struct ovn_northd_lb_backend *backend_nb =
&lb_vip_nb->backends_nb[i++];
- if (!backend_nb->health_check) {
- continue;
- }
-
- const char *protocol = lb->nlb->protocol;
- if (!protocol || !protocol[0]) {
- protocol = "tcp";
+ /* Validation of cases of incorrect parameter
+ * settings at the backend level. */
+ if (lb_vip_nb->lb_health_check &&
+ !backend_nb->health_check) {
+ continue;
}
- struct service_monitor_info *mon_info =
- get_service_mon(svc_mons_data->local_svc_monitors_map,
- svc_mons_data->ic_learned_svc_monitors_map,
- backend->ip_str,
- backend_nb->logical_port,
- backend->port,
- protocol);
-
- if (!mon_info) {
+ if (lb->is_distributed &&
+ !backend_nb->distributed_backend ) {
continue;
}
- ovs_assert(mon_info->sbrec_mon);
- if (mon_info->sbrec_mon->status &&
- strcmp(mon_info->sbrec_mon->status, "online")) {
+ if (backend_nb->health_check &&
+ !is_backend_available(lb,
+ backend,
+ backend_nb,
+ svc_mons_data)) {
continue;
}
n_active_backends++;
- bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&backend->ip);
- ds_put_format(action, ipv6 ? "[%s]:%"PRIu16"," : "%s:%"PRIu16",",
- backend->ip_str, backend->port);
+ append_lb_backend_to_action(backend,
+ backend_nb,
+ backend_nb->distributed_backend,
+ action);
}
ds_chomp(action, ',');
drop = !n_active_backends && !lb_vip->empty_backend_rej;
reject = !n_active_backends && lb_vip->empty_backend_rej;
} else {
- ds_put_format(action, "ct_lb_mark(backends=%s",
- lb_vip_nb->backend_ips);
+ ds_put_format(action, "%s", lb_vip_nb->backend_ips);
}
if (reject) {
@@ -3393,6 +3437,19 @@ build_lb_vip_actions(const struct ovn_northd_lb *lb,
return reject;
}
+static inline void
+handle_od_lb_datapath_modes(struct ovn_datapath *od,
+ struct ovn_lb_datapaths *lb_dps)
+{
+ if (od->lb_with_stateless_mode) {
+ hmapx_add(&lb_dps->ls_lb_with_stateless_mode, od);
+ }
+
+ if (lb_dps->lb->is_distributed) {
+ od->is_distributed = true;
+ }
+}
+
static void
build_lb_datapaths(const struct hmap *lbs, const struct hmap *lb_groups,
struct ovn_datapaths *ls_datapaths,
@@ -3435,9 +3492,7 @@ build_lb_datapaths(const struct hmap *lbs, const struct
hmap *lb_groups,
lb_dps = ovn_lb_datapaths_find(lb_datapaths_map, lb_uuid);
ovs_assert(lb_dps);
ovn_lb_datapaths_add_ls(lb_dps, 1, &od, ods_size(ls_datapaths));
- if (od->lb_with_stateless_mode) {
- hmapx_add(&lb_dps->ls_lb_with_stateless_mode, od);
- }
+ handle_od_lb_datapath_modes(od, lb_dps);
}
for (size_t i = 0; i < od->nbs->n_load_balancer_group; i++) {
@@ -3470,6 +3525,7 @@ build_lb_datapaths(const struct hmap *lbs, const struct
hmap *lb_groups,
&od->nbr->load_balancer[i]->header_.uuid;
lb_dps = ovn_lb_datapaths_find(lb_datapaths_map, lb_uuid);
ovs_assert(lb_dps);
+ handle_od_lb_datapath_modes(od, lb_dps);
ovn_lb_datapaths_add_lr(lb_dps, 1, &od, ods_size(lr_datapaths));
}
}
@@ -3824,6 +3880,7 @@ sync_pb_for_lrp(struct ovn_port *op,
bool always_redirect =
!lr_stateful_rec->lrnat_rec->has_distributed_nat &&
+ !lr_stateful_rec->has_distributed_lb &&
!l3dgw_port_has_associated_vtep_lports(op->primary_port);
const char *redirect_type = smap_get(&op->nbrp->options,
@@ -5404,10 +5461,7 @@ northd_handle_lb_data_changes(struct tracked_lb_data
*trk_lb_data,
lb_dps = ovn_lb_datapaths_find(lb_datapaths_map, &uuidnode->uuid);
ovs_assert(lb_dps);
ovn_lb_datapaths_add_ls(lb_dps, 1, &od, ods_size(ls_datapaths));
-
- if (od->lb_with_stateless_mode) {
- hmapx_add(&lb_dps->ls_lb_with_stateless_mode, od);
- }
+ handle_od_lb_datapath_modes(od, lb_dps);
/* Add the lb to the northd tracked data. */
hmapx_add(&nd_changes->trk_lbs.crupdated, lb_dps);
@@ -5446,6 +5500,7 @@ northd_handle_lb_data_changes(struct tracked_lb_data
*trk_lb_data,
lb_dps = ovn_lb_datapaths_find(lb_datapaths_map, &uuidnode->uuid);
ovs_assert(lb_dps);
ovn_lb_datapaths_add_lr(lb_dps, 1, &od, ods_size(lr_datapaths));
+ handle_od_lb_datapath_modes(od, lb_dps);
/* Add the lb to the northd tracked data. */
hmapx_add(&nd_changes->trk_lbs.crupdated, lb_dps);
@@ -10688,8 +10743,13 @@ build_lswitch_ip_unicast_lookup(struct ovn_port *op,
: debug_drop_action();
if (lsp_is_router(op->nbsp) && op->peer && op->peer->nbrp) {
+ /* Distributed gateway ports default to centralized mode.
+ * They operate in distributed mode only when configured
+ * on their bound router. */
+ bool peer_lrp_is_centralized = !op->peer->od->is_distributed;
+
/* For ports connected to logical routers add flows to bypass the
- * broadcast flooding of ARP/ND requests in table 19. We direct the
+ * broadcast flooding of ARP/ND requests in table 22. We direct the
* requests only to the router port that owns the IP address.
*/
build_lswitch_rport_arp_req_flows(op->peer, op->od, op, lflows,
@@ -10704,7 +10764,8 @@ build_lswitch_ip_unicast_lookup(struct ovn_port *op,
ds_put_format(match, "eth.dst == %s", op->peer->lrp_networks.ea_s);
}
- if (!vector_is_empty(&op->peer->od->l3dgw_ports) &&
+ if (peer_lrp_is_centralized &&
+ !vector_is_empty(&op->peer->od->l3dgw_ports) &&
!vector_is_empty(&op->od->localnet_ports)) {
add_lrp_chassis_resident_check(op->peer, match);
} else if (op->cr_port) {
@@ -12438,6 +12499,14 @@ build_distr_lrouter_nat_flows_for_lb(struct
lrouter_nat_lb_flows_ctx *ctx,
size_t new_match_len = ctx->new_match->length;
size_t undnat_match_len = ctx->undnat_match->length;
+ bool lb_is_centralized = !ctx->lb->is_distributed;
+
+ /* If load balancer is distributed, then the response traffic
+ * must be returned through the distributed port.*/
+ const char *gw_outport = lb_is_centralized ?
+ dgp->cr_port->json_key :
+ dgp->json_key;
+
/* (NOTE) dnat_action: Add the first LB backend IP as a destination
* action of the lr_in_dnat NAT rule. Including the backend IP is useful
* for accepting packets coming from a chassis that does not have
@@ -12472,8 +12541,9 @@ build_distr_lrouter_nat_flows_for_lb(struct
lrouter_nat_lb_flows_ctx *ctx,
meter = copp_meter_get(COPP_REJECT, od->nbr->copp, ctx->meter_groups);
}
- if (!vector_is_empty(&ctx->lb_vip->backends) ||
- !ctx->lb_vip->empty_backend_rej) {
+ if (lb_is_centralized
+ && (!vector_is_empty(&ctx->lb_vip->backends) ||
+ !ctx->lb_vip->empty_backend_rej)) {
ds_put_format(ctx->new_match, " && is_chassis_resident(%s)",
dgp->cr_port->json_key);
}
@@ -12520,8 +12590,8 @@ build_distr_lrouter_nat_flows_for_lb(struct
lrouter_nat_lb_flows_ctx *ctx,
*/
ds_put_format(ctx->undnat_match, ") && outport == %s", dgp->json_key);
ds_clear(ctx->gw_redir_action);
- ds_put_format(ctx->gw_redir_action, "outport = %s; next;",
- dgp->cr_port->json_key);
+ ds_put_format(ctx->gw_redir_action,
+ "outport = %s; next;", gw_outport);
ovn_lflow_add_with_hint(ctx->lflows, od, S_ROUTER_IN_GW_REDIRECT,
200, ds_cstr(ctx->undnat_match),
@@ -12530,9 +12600,14 @@ build_distr_lrouter_nat_flows_for_lb(struct
lrouter_nat_lb_flows_ctx *ctx,
lflow_ref);
ds_truncate(ctx->undnat_match, undnat_match_len);
- ds_put_format(ctx->undnat_match, ") && (inport == %s || outport == %s)"
- " && is_chassis_resident(%s)", dgp->json_key, dgp->json_key,
- dgp->cr_port->json_key);
+ ds_put_format(ctx->undnat_match, ") && (inport == %s || outport == %s)",
+ dgp->json_key, dgp->json_key);
+
+ if (lb_is_centralized) {
+ ds_put_format(ctx->undnat_match, " && is_chassis_resident(%s)",
+ dgp->cr_port->json_key);
+ }
+
/* Use the LB protocol as matching criteria for out undnat and snat when
* creating LBs with stateless NAT. */
if (stateless_nat) {
@@ -13855,6 +13930,10 @@ build_gateway_mtu_flow(struct lflow_table *lflows,
struct ovn_port *op,
static bool
consider_l3dgw_port_is_centralized(struct ovn_port *op)
{
+ if (op->od->is_distributed) {
+ return false;
+ }
+
if (l3dgw_port_has_associated_vtep_lports(op)) {
return false;
}
@@ -16084,7 +16163,7 @@ build_ipv6_input_flows_for_lrouter_port(
* router's own IP address. */
for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
ds_clear(match);
- if (lrp_is_l3dgw(op)) {
+ if (lrp_is_l3dgw(op) && !op->od->is_distributed) {
/* Traffic with eth.src = l3dgw_port->lrp_networks.ea_s
* should only be sent from the gateway chassi, so that
* upstream MAC learning points to the gateway chassis.
@@ -16361,13 +16440,15 @@ build_lrouter_ipv4_ip_input(struct ovn_port *op,
/* ARP reply. These flows reply to ARP requests for the router's own
* IP address. */
+ bool od_distributed = op->od->is_distributed;
for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
ds_clear(match);
ds_put_format(match, "arp.spa == %s/%u",
op->lrp_networks.ipv4_addrs[i].network_s,
op->lrp_networks.ipv4_addrs[i].plen);
- if (!vector_is_empty(&op->od->l3dgw_ports) && op->peer
+ if (!od_distributed &&
+ !vector_is_empty(&op->od->l3dgw_ports) && op->peer
&& !vector_is_empty(&op->peer->od->localnet_ports)) {
add_lrp_chassis_resident_check(op, match);
}
diff --git a/northd/northd.h b/northd/northd.h
index 32134d36e..b87855310 100644
--- a/northd/northd.h
+++ b/northd/northd.h
@@ -450,6 +450,11 @@ struct ovn_datapath {
/* Indicates that the LS has valid vni associated with it. */
bool has_evpn_vni;
+ /* True if datapath has some distributed dependencies.
+ * Currently, this only applies to load balancers attached to datapath
+ * with distributed mode enabled. */
+ bool is_distributed;
+
/* OVN northd only needs to know about logical router gateway ports for
* NAT/LB on a distributed router. The "distributed gateway ports" are
* populated only when there is a gateway chassis or ha chassis group
diff --git a/ovn-nb.xml b/ovn-nb.xml
index b5fe44e53..c6cd6dccd 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -2378,13 +2378,15 @@
<p>
Maps from endpoint IP to a colon-separated pair of logical port name
and source IP,
- e.g. <code><var>port_name</var>:<var>sourc_ip</var></code> for IPv4.
+ e.g. <code><var>port_name</var>:<var>source_ip</var></code> for IPv4.
Health checks are sent to this port with the specified source IP.
For IPv6 square brackets must be used around IP address, e.g:
- <code><var>port_name</var>:<var>[sourc_ip]</var></code>
+ <code><var>port_name</var>:<var>[source_ip]</var></code>
Remote endpoint:
Specify :target_zone_name at the end of the above syntax to create
remote health checks in a specific zone.
+ For distributed load balancers - ip_port_mappings is required.
+ In the absence of health checks - source_ip is optional.
</p>
<p>
@@ -2587,6 +2589,13 @@ or
traffic may be dropped in scenarios where we have different chassis
for each DGP. This option is set to <code>false</code> by default.
</column>
+
+ <column name="options" key="distributed">
+ Enabling this option distributes load balancing across compute nodes,
+ where traffic is routed only to local backends. To ensure proper
+ operation, you must configure <ref column="ip_port_mappings"/> first.
+ </column>
+
</group>
</table>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 15bc82d58..5d6ea75a6 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -17872,6 +17872,9 @@ ovn_start
# ip_port_mappings syntax: ip:lport_name:src_ip:<az_name>(for remote lports)
check ovn-nbctl ls-add ls1
+check ovn-nbctl lr-add lr1
+
+ovn-appctl -t ovn-northd vlog/disable-rate-limit
check ovn-nbctl lb-add lb1_ipv4 1.1.1.1:80
192.168.0.1:10880,192.168.0.2:10880,192.168.0.3:10880
AT_CHECK([ovn-nbctl --wait=sb \
@@ -17941,6 +17944,47 @@ check ovn-nbctl set load_balancer lb1_ipv4
ip_port_mappings:192.168.0.1=lport1:1
check_row_count sb:Service_Monitor 0
OVS_WAIT_UNTIL([grep "Empty AZ name specified" northd/ovn-northd.log])
+
+check ovn-nbctl lb-del lb1_ipv4
+
+# Check correct setup of distributed load balancers.
+echo > northd/ovn-northd.log
+check ovn-nbctl lb-add lb_distubuted 1.1.1.1:80
192.168.0.1:10880,192.168.0.2:10880
+check ovn-nbctl lr-lb-add lr1 lb_distubuted
+check ovn-nbctl set load_balancer lb_distubuted options:distributed=true
+
+# Check that load balancer does not work in a distributed mode - there is no
ip_port_mappings setting
+ovn-sbctl lflow-list lr1 > lr1_lflow
+AT_CHECK([cat lr1_lflow | grep lr_in_dnat | grep priority=120 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel &&
ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80),
action=(drop;)
+])
+
+# Check that the load balancer has only one backend available since the only
one backend has ip_port_mappings
+check ovn-nbctl set load_balancer lb_distubuted
ip_port_mappings:192.168.0.1=lport1
+ovn-sbctl lflow-list lr1 > lr1_lflow
+AT_CHECK([cat lr1_lflow | grep lr_in_dnat | grep priority=120 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel &&
ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80),
action=(ct_lb_mark_local(backends="lport1":192.168.0.1:10880);)
+])
+
+check ovn-nbctl set load_balancer lb_distubuted
ip_port_mappings:192.168.0.2=lport2
+ovn-sbctl lflow-list lr1 > lr1_lflow
+AT_CHECK([cat lr1_lflow | grep lr_in_dnat | grep priority=120 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel &&
ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80),
action=(ct_lb_mark_local(backends="lport1":192.168.0.1:10880,"lport2":192.168.0.2:10880);)
+])
+
+# Check if health check is configured, ip_port_mappings must be provided.
+AT_CHECK([ovn-nbctl --wait=sb \
+ -- --id=@hc create Load_Balancer_Health_Check vip="1.1.1.1\:80" \
+ options:failure_count=100 \
+ -- add Load_Balancer lb_distubuted health_check @hc | uuidfilt],
[0], [<0>
+])
+
+ovn-sbctl lflow-list lr1 > lr1_lflow
+OVS_WAIT_UNTIL([grep "Expected ':' separator for:" northd/ovn-northd.log])
+AT_CHECK([cat lr1_lflow | grep lr_in_dnat | grep priority=120 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel &&
ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80),
action=(drop;)
+])
+
AT_CLEANUP
])
@@ -18452,6 +18496,209 @@ AT_CHECK(
AT_CLEANUP
])
+OVN_FOR_EACH_NORTHD_NO_HV([
+AT_SETUP([Distributed lb: logical-flow test - IPv4/IPv6 case])
+ovn_start
+
+# (1) Create two load balancers, IPv4 and IPv6, attach them to a router that
has a distributed gateway port.
+# (2) Set the gateway to an existing gateway - verify that all router flows
are centralized (arp/nd).
+# (3) Change the gateway to a non-existent one, make one load balancer
distributed - verify that all router flows for the router become distributed.
+# (4) Verify that flows for the distributed load balancer are distributed, and
for the second load balancer are centralized.
+# (5) Make the second load balancer distributed, verify its flows.
+# (6) Remove the option from one load balancer, verify that the logic is
maintained that if at least one load balancer has the option - the entire
router is distributed.
+
+check ovn-nbctl ls-add outside
+
+check ovn-nbctl lsp-add outside outside \
+ -- lsp-set-addresses outside unknown \
+ -- lsp-set-type outside localnet
+
+check ovn-nbctl --wait=sb set Logical_Switch_Port outside tag_request=2
+
+check ovn-nbctl lsp-add outside outside-down \
+ -- lsp-set-type outside-down router \
+ -- lsp-set-addresses outside-down router \
+ -- lsp-set-options outside-down router-port=lr1-up
+
+check ovn-nbctl lr-add lr1 \
+ -- lrp-add lr1 lr1-up 11:11:11:11:11:11 169.254.0.1/24
2001:db8:abcd:0002::bad/64 \
+ -- lrp-add lr1 lr1-down 12:12:12:12:12:12 192.168.0.1/24
2001:db8:abcd:0001::c0fe/64
+
+check ovn-nbctl ls-add ls1 \
+ -- lsp-add ls1 lport1 \
+ -- lsp-set-addresses lport1 "13:13:13:13:13:13 192.168.0.101" \
+ -- lsp-add ls1 lport2 \
+ -- lsp-set-addresses lport2 "14:14:14:14:14:14 192.168.0.102"
+
+check ovn-nbctl lsp-add ls1 ls1-up \
+ -- lsp-set-type ls1-up router \
+ -- lsp-set-addresses ls1-up router \
+ -- lsp-set-options ls1-up router-port=lr1-down
+
+check ovn-nbctl --wait=sb sync
+
+check ovn-nbctl ha-chassis-group-add gateway
+check ovn-nbctl ha-chassis-group-add-chassis gateway hv1 1
+ha_g_uuid=$(fetch_column nb:HA_Chassis_Group _uuid name=gateway)
+lr1_up_uuid=$(fetch_column nb:Logical_Router_Port _uuid name=lr1-up)
+check ovn-nbctl set logical_router_port $lr1_up_uuid
ha_chassis_group=$ha_g_uuid
+
+check ovn-nbctl --wait=sb sync
+
+check ovn-nbctl lb-add lb1_ipv4 1.1.1.1:80
192.168.0.101:10880,192.168.0.102:10880
+check ovn-nbctl set Load_Balancer lb1_ipv4
ip_port_mappings:192.168.0.101=lport1:192.168.0.199
+check ovn-nbctl set Load_Balancer lb1_ipv4
ip_port_mappings:192.168.0.102=lport2:192.168.0.199
+check ovn-nbctl lr-lb-add lr1 lb1_ipv4
+check ovn-nbctl --wait=sb sync
+
+check ovn-nbctl lb-add lb1_ipv6 [[2000::1]]:80 [[2001:db8:abcd:1::2]]:10882
+check ovn-nbctl set Load_Balancer lb1_ipv6
ip_port_mappings:\"[[2001:db8:abcd:1::2]]\"=\"lport1\"
+check ovn-nbctl lr-lb-add lr1 lb1_ipv6
+check ovn-nbctl --wait=sb sync
+
+ovn-sbctl lflow-list lr1 > lr1_lflows_before
+ovn-sbctl lflow-list outside > outside_lflows_before
+
+AT_CHECK([cat outside_lflows_before | grep ls_in_l2_lkup | grep priority=50 |
ovn_strip_lflows], [0], [dnl
+ table=??(ls_in_l2_lkup ), priority=50 , match=(eth.dst ==
11:11:11:11:11:11 && is_chassis_resident("cr-lr1-up")), action=(outport =
"outside-down"; output;)
+])
+
+AT_CHECK([cat lr1_lflows_before | grep lr_in_ip_input | grep priority=90 |
grep 169.254.0.1 | ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" &&
arp.op == 1 && arp.tpa == 169.254.0.1 && arp.spa == 169.254.0.0/24 &&
is_chassis_resident("cr-lr1-up")), action=(eth.dst = eth.src; eth.src =
xreg0[[0..47]]; arp.op = 2; /* ARP reply */ arp.tha = arp.sha; arp.sha =
xreg0[[0..47]]; arp.tpa <-> arp.spa; outport = inport; flags.loopback = 1;
output;)
+ table=??(lr_in_ip_input ), priority=90 , match=(ip4.dst == 169.254.0.1
&& icmp4.type == 8 && icmp4.code == 0), action=(ip4.dst <-> ip4.src; ip.ttl =
255; icmp4.type = 0; flags.loopback = 1; next; )
+])
+
+AT_CHECK([cat lr1_lflows_before | grep lr_in_ip_input | grep priority=90 |
grep 2001:db8:abcd:2::bad | ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" &&
ip6.dst == {2001:db8:abcd:2::bad, ff02::1:ff00:bad} && nd_ns && nd.target ==
2001:db8:abcd:2::bad && is_chassis_resident("cr-lr1-up")), action=(nd_na_router
{ eth.src = xreg0[[0..47]]; ip6.src = nd.target; nd.tll = xreg0[[0..47]];
outport = inport; flags.loopback = 1; output; };)
+ table=??(lr_in_ip_input ), priority=90 , match=(ip6.dst ==
{2001:db8:abcd:2::bad, fe80::1311:11ff:fe11:1111} && icmp6.type == 128 &&
icmp6.code == 0), action=(ip6.dst <-> ip6.src; ip.ttl = 255; icmp6.type = 129;
flags.loopback = 1; next; )
+])
+
+AT_CHECK([cat lr1_lflows_before | grep lr_in_admission | grep priority=50 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_admission ), priority=50 , match=(eth.dst ==
11:11:11:11:11:11 && inport == "lr1-up" && is_chassis_resident("cr-lr1-up")),
action=(xreg0[[0..47]] = 11:11:11:11:11:11; next;)
+ table=??(lr_in_admission ), priority=50 , match=(eth.dst ==
12:12:12:12:12:12 && inport == "lr1-down"), action=(xreg0[[0..47]] =
12:12:12:12:12:12; next;)
+ table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport ==
"lr1-down"), action=(xreg0[[0..47]] = 12:12:12:12:12:12; next;)
+ table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport ==
"lr1-up"), action=(xreg0[[0..47]] = 11:11:11:11:11:11; next;)
+])
+
+AT_CHECK([cat lr1_lflows_before | grep lr_out_undnat | grep priority=120 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_out_undnat ), priority=120 , match=(ip4 && ((ip4.src ==
192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src ==
10880)) && (inport == "lr1-up" || outport == "lr1-up") &&
is_chassis_resident("cr-lr1-up")), action=(ct_dnat;)
+ table=??(lr_out_undnat ), priority=120 , match=(ip6 && ((ip6.src ==
2001:db8:abcd:1::2 && tcp.src == 10882)) && (inport == "lr1-up" || outport ==
"lr1-up") && is_chassis_resident("cr-lr1-up")), action=(ct_dnat;)
+])
+
+AT_CHECK([cat lr1_lflows_before | grep lr_in_gw_redirect | ovn_strip_lflows],
[0], [dnl
+ table=??(lr_in_gw_redirect ), priority=0 , match=(1), action=(next;)
+ table=??(lr_in_gw_redirect ), priority=200 , match=(ip4 && ((ip4.src ==
192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src ==
10880)) && outport == "lr1-up"), action=(outport = "cr-lr1-up"; next;)
+ table=??(lr_in_gw_redirect ), priority=200 , match=(ip6 && ((ip6.src ==
2001:db8:abcd:1::2 && tcp.src == 10882)) && outport == "lr1-up"),
action=(outport = "cr-lr1-up"; next;)
+ table=??(lr_in_gw_redirect ), priority=50 , match=(outport == "lr1-up"),
action=(outport = "cr-lr1-up"; next;)
+])
+
+AT_CHECK([cat lr1_lflows_before | grep lr_in_dnat | grep priority=120 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel &&
ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80 &&
is_chassis_resident("cr-lr1-up")),
action=(ct_lb_mark(backends=192.168.0.101:10880,192.168.0.102:10880);)
+ table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel &&
ip6 && ip6.dst == 2000::1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80 &&
is_chassis_resident("cr-lr1-up")),
action=(ct_lb_mark(backends=[[2001:db8:abcd:1::2]]:10882);)
+])
+
+check ovn-nbctl clear logical_router_port $lr1_up_uuid ha_chassis_group
+check ovn-nbctl ha-chassis-group-del gateway
+check ovn-nbctl ha-chassis-group-add gateway2
+check ovn-nbctl ha-chassis-group-add-chassis gateway2 test 1
+ha_g_uuid=$(fetch_column nb:HA_Chassis_Group _uuid name=gateway2)
+lr1_up_uuid=$(fetch_column nb:Logical_Router_Port _uuid name=lr1-up)
+check ovn-nbctl set logical_router_port $lr1_up_uuid
ha_chassis_group=$ha_g_uuid
+
+check ovn-nbctl set load_balancer lb1_ipv4 options:distributed=true
+check ovn-nbctl --wait=hv sync
+
+ovn-sbctl lflow-list outside > outside_lflows_after
+ovn-sbctl lflow-list lr1 > lr1_lflows_after
+
+AT_CHECK([cat outside_lflows_after | grep ls_in_l2_lkup | grep priority=50 |
ovn_strip_lflows], [0], [dnl
+ table=??(ls_in_l2_lkup ), priority=50 , match=(eth.dst ==
11:11:11:11:11:11), action=(outport = "outside-down"; output;)
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_in_ip_input | grep priority=90 | grep
169.254.0.1 | ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" &&
arp.op == 1 && arp.tpa == 169.254.0.1 && arp.spa == 169.254.0.0/24),
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply
*/ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport =
inport; flags.loopback = 1; output;)
+ table=??(lr_in_ip_input ), priority=90 , match=(ip4.dst == 169.254.0.1
&& icmp4.type == 8 && icmp4.code == 0), action=(ip4.dst <-> ip4.src; ip.ttl =
255; icmp4.type = 0; flags.loopback = 1; next; )
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_in_ip_input | grep priority=90 | grep
2001:db8:abcd:2::bad | ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" &&
ip6.dst == {2001:db8:abcd:2::bad, ff02::1:ff00:bad} && nd_ns && nd.target ==
2001:db8:abcd:2::bad), action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src
= nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1;
output; };)
+ table=??(lr_in_ip_input ), priority=90 , match=(ip6.dst ==
{2001:db8:abcd:2::bad, fe80::1311:11ff:fe11:1111} && icmp6.type == 128 &&
icmp6.code == 0), action=(ip6.dst <-> ip6.src; ip.ttl = 255; icmp6.type = 129;
flags.loopback = 1; next; )
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_in_admission | grep priority=50 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_admission ), priority=50 , match=(eth.dst ==
11:11:11:11:11:11 && inport == "lr1-up"), action=(xreg0[[0..47]] =
11:11:11:11:11:11; next;)
+ table=??(lr_in_admission ), priority=50 , match=(eth.dst ==
12:12:12:12:12:12 && inport == "lr1-down"), action=(xreg0[[0..47]] =
12:12:12:12:12:12; next;)
+ table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport ==
"lr1-down"), action=(xreg0[[0..47]] = 12:12:12:12:12:12; next;)
+ table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport ==
"lr1-up"), action=(xreg0[[0..47]] = 11:11:11:11:11:11; next;)
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_out_undnat | grep priority=120 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_out_undnat ), priority=120 , match=(ip4 && ((ip4.src ==
192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src ==
10880)) && (inport == "lr1-up" || outport == "lr1-up")), action=(ct_dnat;)
+ table=??(lr_out_undnat ), priority=120 , match=(ip6 && ((ip6.src ==
2001:db8:abcd:1::2 && tcp.src == 10882)) && (inport == "lr1-up" || outport ==
"lr1-up") && is_chassis_resident("cr-lr1-up")), action=(ct_dnat;)
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_in_gw_redirect | ovn_strip_lflows],
[0], [dnl
+ table=??(lr_in_gw_redirect ), priority=0 , match=(1), action=(next;)
+ table=??(lr_in_gw_redirect ), priority=200 , match=(ip4 && ((ip4.src ==
192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src ==
10880)) && outport == "lr1-up"), action=(outport = "lr1-up"; next;)
+ table=??(lr_in_gw_redirect ), priority=200 , match=(ip6 && ((ip6.src ==
2001:db8:abcd:1::2 && tcp.src == 10882)) && outport == "lr1-up"),
action=(outport = "cr-lr1-up"; next;)
+ table=??(lr_in_gw_redirect ), priority=50 , match=(outport == "lr1-up"),
action=(outport = "cr-lr1-up"; next;)
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_in_dnat | grep priority=120 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel &&
ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80),
action=(ct_lb_mark_local(backends="lport1":192.168.0.101:10880,"lport2":192.168.0.102:10880);)
+ table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel &&
ip6 && ip6.dst == 2000::1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80 &&
is_chassis_resident("cr-lr1-up")),
action=(ct_lb_mark(backends=[[2001:db8:abcd:1::2]]:10882);)
+])
+
+check ovn-nbctl set load_balancer lb1_ipv6 options:distributed=true
+check ovn-nbctl --wait=hv sync
+
+ovn-sbctl lflow-list outside > outside_lflows_after
+ovn-sbctl lflow-list lr1 > lr1_lflows_after
+
+AT_CHECK([cat lr1_lflows_after | grep lr_out_undnat | grep priority=120 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_out_undnat ), priority=120 , match=(ip4 && ((ip4.src ==
192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src ==
10880)) && (inport == "lr1-up" || outport == "lr1-up")), action=(ct_dnat;)
+ table=??(lr_out_undnat ), priority=120 , match=(ip6 && ((ip6.src ==
2001:db8:abcd:1::2 && tcp.src == 10882)) && (inport == "lr1-up" || outport ==
"lr1-up")), action=(ct_dnat;)
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_in_gw_redirect | ovn_strip_lflows],
[0], [dnl
+ table=??(lr_in_gw_redirect ), priority=0 , match=(1), action=(next;)
+ table=??(lr_in_gw_redirect ), priority=200 , match=(ip4 && ((ip4.src ==
192.168.0.101 && tcp.src == 10880) || (ip4.src == 192.168.0.102 && tcp.src ==
10880)) && outport == "lr1-up"), action=(outport = "lr1-up"; next;)
+ table=??(lr_in_gw_redirect ), priority=200 , match=(ip6 && ((ip6.src ==
2001:db8:abcd:1::2 && tcp.src == 10882)) && outport == "lr1-up"),
action=(outport = "lr1-up"; next;)
+ table=??(lr_in_gw_redirect ), priority=50 , match=(outport == "lr1-up"),
action=(outport = "cr-lr1-up"; next;)
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_in_dnat | grep priority=120 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel &&
ip4 && ip4.dst == 1.1.1.1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80),
action=(ct_lb_mark_local(backends="lport1":192.168.0.101:10880,"lport2":192.168.0.102:10880);)
+ table=??(lr_in_dnat ), priority=120 , match=(ct.new && !ct.rel &&
ip6 && ip6.dst == 2000::1 && reg1[[16..23]] == 6 && reg1[[0..15]] == 80),
action=(ct_lb_mark_local(backends="lport1":[[2001:db8:abcd:1::2]]:10882);)
+])
+
+check ovn-nbctl set load_balancer lb1_ipv6 options:distributed=false
+check ovn-nbctl --wait=hv sync
+
+AT_CHECK([cat outside_lflows_after | grep ls_in_l2_lkup | grep priority=50 |
ovn_strip_lflows], [0], [dnl
+ table=??(ls_in_l2_lkup ), priority=50 , match=(eth.dst ==
11:11:11:11:11:11), action=(outport = "outside-down"; output;)
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_in_ip_input | grep priority=90 | grep
169.254.0.1 | ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" &&
arp.op == 1 && arp.tpa == 169.254.0.1 && arp.spa == 169.254.0.0/24),
action=(eth.dst = eth.src; eth.src = xreg0[[0..47]]; arp.op = 2; /* ARP reply
*/ arp.tha = arp.sha; arp.sha = xreg0[[0..47]]; arp.tpa <-> arp.spa; outport =
inport; flags.loopback = 1; output;)
+ table=??(lr_in_ip_input ), priority=90 , match=(ip4.dst == 169.254.0.1
&& icmp4.type == 8 && icmp4.code == 0), action=(ip4.dst <-> ip4.src; ip.ttl =
255; icmp4.type = 0; flags.loopback = 1; next; )
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_in_ip_input | grep priority=90 | grep
2001:db8:abcd:2::bad | ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_ip_input ), priority=90 , match=(inport == "lr1-up" &&
ip6.dst == {2001:db8:abcd:2::bad, ff02::1:ff00:bad} && nd_ns && nd.target ==
2001:db8:abcd:2::bad), action=(nd_na_router { eth.src = xreg0[[0..47]]; ip6.src
= nd.target; nd.tll = xreg0[[0..47]]; outport = inport; flags.loopback = 1;
output; };)
+ table=??(lr_in_ip_input ), priority=90 , match=(ip6.dst ==
{2001:db8:abcd:2::bad, fe80::1311:11ff:fe11:1111} && icmp6.type == 128 &&
icmp6.code == 0), action=(ip6.dst <-> ip6.src; ip.ttl = 255; icmp6.type = 129;
flags.loopback = 1; next; )
+])
+
+AT_CHECK([cat lr1_lflows_after | grep lr_in_admission | grep priority=50 |
ovn_strip_lflows], [0], [dnl
+ table=??(lr_in_admission ), priority=50 , match=(eth.dst ==
11:11:11:11:11:11 && inport == "lr1-up"), action=(xreg0[[0..47]] =
11:11:11:11:11:11; next;)
+ table=??(lr_in_admission ), priority=50 , match=(eth.dst ==
12:12:12:12:12:12 && inport == "lr1-down"), action=(xreg0[[0..47]] =
12:12:12:12:12:12; next;)
+ table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport ==
"lr1-down"), action=(xreg0[[0..47]] = 12:12:12:12:12:12; next;)
+ table=??(lr_in_admission ), priority=50 , match=(eth.mcast && inport ==
"lr1-up"), action=(xreg0[[0..47]] = 11:11:11:11:11:11; next;)
+])
+
+AT_CLEANUP
+])
+
OVN_FOR_EACH_NORTHD_NO_HV([
AT_SETUP([Unicast ARP flows])
ovn_start
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 373a87657..8bd3a3b2b 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -18976,3 +18976,248 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/.*error receiving.*/d
/.*terminating with signal 15.*/d"])
AT_CLEANUP
])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([Distributed Load Balancer: IPv6 case])
+AT_KEYWORDS([ovnlb])
+
+# Simple Test for Basic Functionality Verification:
+# client - br-ext - br-int - outside-switch - (distrubuted port) lr - backend
+# test case:
+# 1. Create 2 centralized load balancer(ipv4/ipv6), specifying gateway chassis
for router.
+# 2. Moving gateway to a non-existent chassis.
+# 3. Enable distributed option on load balancer.
+# 4. The distributed load balancer is expected to continue working.
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+ADD_BR([br-ext])
+
+check ovs-ofctl add-flow br-ext action=normal
+# Set external-ids in br-int needed for ovn-controller
+check ovs-vsctl \
+ -- set Open_vSwitch . external-ids:system-id=hv1 \
+ -- set Open_vSwitch .
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+ -- set bridge br-int fail-mode=secure
other-config:disable-in-band=true \
+ -- set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+check ovn-nbctl lr-add lr
+check ovn-nbctl ls-add internal
+check ovn-nbctl ls-add public
+
+check ovn-nbctl lrp-add lr lr-pub 00:00:01:01:02:03 192.168.100.1/24
+check ovn-nbctl lsp-add public pub-lr -- set Logical_Switch_Port pub-lr \
+ type=router options:router-port=lr-pub addresses=\"00:00:01:01:02:03\"
+
+check ovn-nbctl lrp-add lr lr-internal 00:00:01:01:02:04 192.168.200.1/24
+check ovn-nbctl lsp-add internal internal-lr -- set Logical_Switch_Port
internal-lr \
+ type=router options:router-port=lr-internal addresses=\"00:00:01:01:02:04\"
+
+check ovn-nbctl lsp-add internal server_ipv4 -- lsp-set-addresses server_ipv4
"unknown"
+
+check ovn-nbctl lsp-add public ln_port \
+ -- lsp-set-addresses ln_port unknown \
+ -- lsp-set-type ln_port localnet \
+ -- lsp-set-options ln_port network_name=phynet
+
+check ovn-nbctl lrp-set-gateway-chassis lr-pub hv1
+
+check ovn-nbctl lb-add lb1_ipv4 1.1.1.1:80 192.168.200.10:10880
+check ovn-nbctl lr-lb-add lr lb1_ipv4
+
+check ovn-nbctl --wait=hv sync
+
+ADD_NAMESPACES(client_ipv4)
+ADD_VETH(client_ipv4, client_ipv4, br-ext, "192.168.100.10/24",
"f0:00:00:01:02:03", \
+ "192.168.100.1")
+
+ADD_NAMESPACES(server_ipv4)
+ADD_VETH(server_ipv4, server_ipv4, br-int, "192.168.200.10/24",
"f0:00:0f:01:02:03", \
+ "192.168.200.1")
+
+NETNS_DAEMONIZE([server_ipv4], [nc -l -k 192.168.200.10 10880], [serverv4.pid])
+
+# Checking backend availability.
+NS_CHECK_EXEC([client_ipv4], [nc 1.1.1.1 80 -z], [0], [ignore], [ignore])
+
+# Changing the gateway to a non-existent one.
+check ovn-nbctl clear logical_router_port lr-pub gateway_chassis
+check ovn-nbctl lrp-set-gateway-chassis lr-pub hv2
+
+# ovn-controller currently does not recalculate local datapaths
+# when 'ha_chassis_group' change, so we reboot it.
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+start_daemon ovn-controller
+wait_for_ports_up
+
+# Check public switch not in local datapaths
+AT_CHECK([ovn-appctl -t ovn-controller debug/dump-local-datapaths], [0], [dnl
+Local datapaths:
+Datapath: lr, type: router
+Datapath: internal, type: switch
+])
+
+AT_CHECK([ovs-ofctl dump-groups br-int | sed -e
's/table=[[0-9]]*/table=<cleared>/'], [0], [dnl
+NXST_GROUP_DESC reply (xid=0x2):
+])
+
+check ovn-nbctl set load_balancer lb1_ipv4 options:distributed=true
+check ovn-nbctl set Load_Balancer lb1_ipv4
ip_port_mappings:192.168.200.10=server_ipv4
+check ovn-nbctl --wait=hv sync
+
+# Check that external switch has been added to local datapaths on distrubuted
nodes
+# when 'distributed' option is enabled on load balancer.
+AT_CHECK([ovn-appctl -t ovn-controller debug/dump-local-datapaths], [0], [dnl
+Local datapaths:
+Datapath: lr, type: router
+Datapath: internal, type: switch
+Datapath: public, type: switch
+])
+
+AT_CHECK([ovs-ofctl dump-groups br-int | sed -e
's/table=[[0-9]]*/table=<cleared>/'], [0], [dnl
+NXST_GROUP_DESC reply (xid=0x2):
+
group_id=1,type=select,selection_method=dp_hash,bucket=bucket_id:0,weight:100,actions=ct(commit,table=<cleared>,zone=NXM_NX_REG11[[0..15]],nat(dst=192.168.200.10:10880),exec(load:0x1->NXM_NX_CT_MARK[[1]]))
+])
+
+# Checking backend availability.
+NS_CHECK_EXEC([client_ipv4], [nc 1.1.1.1 80 -z], [0], [ignore], [ignore])
+
+check ovn-nbctl lb-del lb1_ipv4
+
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([Distributed Load Balancer: IPv6 case])
+AT_KEYWORDS([ovnlb])
+
+# Simple Test for Basic Functionality Verification:
+# client - br-ext - br-int - outside-switch - (distrubuted port) lr - backend
+# test case:
+# 1. Create 2 centralized load balancer(ipv4/ipv6), specifying gateway chassis
for router.
+# 2. Moving gateway to a non-existent chassis.
+# 3. Enable distributed option on load balancer.
+# 4. The distributed load balancer is expected to continue working.
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+ADD_BR([br-ext])
+
+check ovs-ofctl add-flow br-ext action=normal
+# Set external-ids in br-int needed for ovn-controller
+check ovs-vsctl \
+ -- set Open_vSwitch . external-ids:system-id=hv1 \
+ -- set Open_vSwitch .
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+ -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+ -- set bridge br-int fail-mode=secure
other-config:disable-in-band=true \
+ -- set Open_vSwitch . external-ids:ovn-bridge-mappings=phynet:br-ext
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+check ovn-nbctl lr-add lr
+check ovn-nbctl ls-add internal
+check ovn-nbctl ls-add public
+
+check ovn-nbctl lrp-add lr lr-pub 00:00:01:01:02:03 2001:db8:abcd:0002::bad/64
+check ovn-nbctl lsp-add public pub-lr -- set Logical_Switch_Port pub-lr \
+ type=router options:router-port=lr-pub addresses=\"00:00:01:01:02:03\"
+
+check ovn-nbctl lrp-add lr lr-internal 00:00:01:01:02:04
2001:db8:abcd:0001::c0fe/64
+check ovn-nbctl lsp-add internal internal-lr -- set Logical_Switch_Port
internal-lr \
+ type=router options:router-port=lr-internal addresses=\"00:00:01:01:02:04\"
+
+check ovn-nbctl lsp-add internal server_ipv6 -- lsp-set-addresses server_ipv6
"unknown"
+
+check ovn-nbctl lsp-add public ln_port \
+ -- lsp-set-addresses ln_port unknown \
+ -- lsp-set-type ln_port localnet \
+ -- lsp-set-options ln_port network_name=phynet
+
+check ovn-nbctl lrp-set-gateway-chassis lr-pub hv1
+
+check ovn-nbctl lb-add lb1_ipv6 [[2000::1]]:80 [[2001:db8:abcd:1::2]]:10882
+check ovn-nbctl lr-lb-add lr lb1_ipv6
+
+check ovn-nbctl --wait=hv sync
+
+ADD_NAMESPACES(client_ipv6)
+ADD_VETH(client_ipv6, client_ipv6, br-ext, "2001:db8:abcd:2::f00d/64",
"f0:00:00:01:02:06", \
+ "2001:db8:abcd:0002::bad")
+
+ADD_NAMESPACES(server_ipv6)
+ADD_VETH(server_ipv6, server_ipv6, br-int, "2001:db8:abcd:1::2/64",
"f0:00:0f:01:02:04", \
+ "2001:db8:abcd:1::c0fe")
+
+# Wait for IPv6 address to be ready
+sleep 5
+
+NETNS_DAEMONIZE([server_ipv6], [ncat -6 -l -k 2001:db8:abcd:1::2 10882],
[serverv6.pid])
+
+# Checking backend availability.
+NS_CHECK_EXEC([client_ipv6], [nc -6 2000::1 80 -z], [0], [ignore], [ignore])
+
+# Changing the gateway to a non-existent one.
+check ovn-nbctl clear logical_router_port lr-pub gateway_chassis
+check ovn-nbctl lrp-set-gateway-chassis lr-pub hv2
+
+# ovn-controller currently does not recalculate local datapaths
+# when 'ha_chassis_group' change, so we reboot it.
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+start_daemon ovn-controller
+wait_for_ports_up
+
+# Check public switch not in local datapaths
+AT_CHECK([ovn-appctl -t ovn-controller debug/dump-local-datapaths], [0], [dnl
+Local datapaths:
+Datapath: lr, type: router
+Datapath: internal, type: switch
+])
+
+AT_CHECK([ovs-ofctl dump-groups br-int | sed -e
's/table=[[0-9]]*/table=<cleared>/'], [0], [dnl
+NXST_GROUP_DESC reply (xid=0x2):
+])
+
+check ovn-nbctl set load_balancer lb1_ipv6 options:distributed=true
+check ovn-nbctl set Load_Balancer lb1_ipv6
ip_port_mappings:\"[[2001:db8:abcd:1::2]]\"=\"server_ipv6\"
+check ovn-nbctl --wait=hv sync
+
+# Check that external switch has been added to local datapaths on distrubuted
nodes
+# when 'distributed' option is enabled on load balancer.
+AT_CHECK([ovn-appctl -t ovn-controller debug/dump-local-datapaths], [0], [dnl
+Local datapaths:
+Datapath: lr, type: router
+Datapath: internal, type: switch
+Datapath: public, type: switch
+])
+
+AT_CHECK([ovs-ofctl dump-groups br-int | sed -e
's/table=[[0-9]]*/table=<cleared>/'], [0], [dnl
+NXST_GROUP_DESC reply (xid=0x2):
+
group_id=1,type=select,selection_method=dp_hash,bucket=bucket_id:0,weight:100,actions=ct(commit,table=<cleared>,zone=NXM_NX_REG11[[0..15]],nat(dst=[[2001:db8:abcd:1::2]]:10882),exec(load:0x1->NXM_NX_CT_MARK[[1]]))
+])
+
+# Checking backend availability.
+NS_CHECK_EXEC([client_ipv6], [nc -6 2000::1 80 -z], [0], [ignore], [ignore])
+
+check ovn-nbctl lb-del lb1_ipv6
+
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+])
--
2.48.1
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev