+
#define MAX_OVN_TAGS 4096
/* Pipeline stages. */
@@ -2935,6 +2942,294 @@ cleanup_sb_ha_chassis_groups(struct northd_context *ctx,
}
}
+struct ovn_lb {
+ struct hmap_node hmap_node;
+
+ const struct nbrec_load_balancer *nlb; /* May be NULL. */
+
+ struct lb_vip *vips;
+ size_t n_vips;
+};
+
+struct lb_vip {
+ char *vip;
+ uint16_t vip_port;
+ int addr_family;
+ char *backend_ips;
+
+ bool health_check;
+ struct lb_vip_backend *backends;
+ size_t n_backends;
+};
+
+struct lb_vip_backend {
+ char *ip;
+ uint16_t port;
+
+ struct ovn_port *op; /* Logical port to which the ip belong to. */
+ bool health_check;
+ char *svc_mon_src_ip; /* Source IP to use for monitoring. */
+ const struct sbrec_service_monitor *sbrec_monitor;
+};
+
+
+static inline struct ovn_lb *
+ovn_lb_find(struct hmap *lbs, struct uuid *uuid)
+{
+ struct ovn_lb *lb;
+ size_t hash = uuid_hash(uuid);
+ HMAP_FOR_EACH_WITH_HASH (lb, hmap_node, hash, lbs) {
+ if (uuid_equals(&lb->nlb->header_.uuid, uuid)) {
+ return lb;
+ }
+ }
+
+ return NULL;
+}
+
+
+struct service_monitor_info {
+ struct hmap_node hmap_node;
+ const struct sbrec_service_monitor *sbrec_mon;
+ bool required;
+};
+
+
+static struct service_monitor_info *
+create_or_get_service_mon(struct northd_context *ctx,
+ struct hmap *monitor_map,
+ const char *ip, const char *logical_port,
+ uint16_t service_port, const char *protocol)
+{
+ uint32_t hash = service_port;
+ hash = hash_string(ip, hash);
+ hash = hash_string(logical_port, hash);
+ struct service_monitor_info *mon_info;
+
+ HMAP_FOR_EACH_WITH_HASH (mon_info, hmap_node, hash, monitor_map) {
+ if (mon_info->sbrec_mon->port == service_port &&
+ !strcmp(mon_info->sbrec_mon->ip, ip) &&
+ !strcmp(mon_info->sbrec_mon->protocol, protocol) &&
+ !strcmp(mon_info->sbrec_mon->logical_port, logical_port)) {
+ return mon_info;
+ }
+ }
+
+ struct sbrec_service_monitor *sbrec_mon =
+ sbrec_service_monitor_insert(ctx->ovnsb_txn);
+ sbrec_service_monitor_set_ip(sbrec_mon, ip);
+ sbrec_service_monitor_set_port(sbrec_mon, service_port);
+ sbrec_service_monitor_set_logical_port(sbrec_mon, logical_port);
+ sbrec_service_monitor_set_protocol(sbrec_mon, protocol);
+ mon_info = xzalloc(sizeof *mon_info);
+ mon_info->sbrec_mon = sbrec_mon;
+ hmap_insert(monitor_map, &mon_info->hmap_node, hash);
+ return mon_info;
+}
+
+static struct ovn_lb *
+ovn_lb_create(struct northd_context *ctx, struct hmap *lbs,
+ const struct nbrec_load_balancer *nbrec_lb,
+ struct hmap *ports, struct hmap *monitor_map)
+{
+ struct ovn_lb *lb = xzalloc(sizeof *lb);
+
+ size_t hash = uuid_hash(&nbrec_lb->header_.uuid);
+ lb->nlb = nbrec_lb;
+ hmap_insert(lbs, &lb->hmap_node, hash);
+
+ lb->n_vips = smap_count(&nbrec_lb->vips);
+ lb->vips = xcalloc(lb->n_vips, sizeof (struct lb_vip));
+ struct smap_node *node;
+ size_t n_vips = 0;
+
+ SMAP_FOR_EACH (node, &nbrec_lb->vips) {
+ char *vip = NULL;
+ uint16_t port;
+ int addr_family;
+
+ ip_address_and_port_from_lb_key(node->key, &vip, &port,
+ &addr_family);
+ if (!vip) {
+ continue;
+ }
+
+ lb->vips[n_vips].vip = vip;
+ lb->vips[n_vips].vip_port = port;
+ lb->vips[n_vips].addr_family = addr_family;
+ lb->vips[n_vips].backend_ips = xstrdup(node->value);
+
+ struct nbrec_load_balancer_health_check *lb_health_check = NULL;
+ for (size_t i = 0; i < nbrec_lb->n_health_check; i++) {
+ if (!strcmp(nbrec_lb->health_check[i]->vip, node->key)) {
+ lb_health_check = nbrec_lb->health_check[i];
+ break;
+ }
+ }
+
+ char *tokstr = xstrdup(node->value);
+ char *save_ptr = NULL;
+ char *token;
+ size_t n_backends = 0;
+ /* Format for a backend ips : IP1:port1,IP2:port2,...". */
+ for (token = strtok_r(tokstr, ",", &save_ptr);
+ token != NULL;
+ token = strtok_r(NULL, ",", &save_ptr)) {
+ n_backends++;
+ }
+
+ free(tokstr);
+ tokstr = xstrdup(node->value);
+ save_ptr = NULL;
+
+ lb->vips[n_vips].n_backends = n_backends;
+ lb->vips[n_vips].backends = xcalloc(n_backends,
+ sizeof (struct lb_vip_backend));
+ lb->vips[n_vips].health_check = lb_health_check ? true: false;
+
+ size_t i = 0;
+ for (token = strtok_r(tokstr, ",", &save_ptr);
+ token != NULL;
+ token = strtok_r(NULL, ",", &save_ptr)) {
+ char *backend_ip;
+ uint16_t backend_port;
+
+ ip_address_and_port_from_lb_key(token, &backend_ip, &backend_port,
+ &addr_family);
+
+ if (!backend_ip) {
+ continue;
+ }
+
+ /* Get the logical port to which this ip belongs to. */
+ struct ovn_port *op = NULL;
+ char *svc_mon_src_ip = NULL;
+ const char *s = smap_get(&nbrec_lb->ip_port_mappings,
+ backend_ip);
+ if (s) {
+ char *port_name = xstrdup(s);
+ char *p = strstr(port_name, ":");
+ if (p) {
+ *p = 0;
+ p++;
+ op = ovn_port_find(ports, port_name);
+ svc_mon_src_ip = xstrdup(p);
+ }
+ free(port_name);
+ }
+
+ lb->vips[n_vips].backends[i].ip = backend_ip;
+ lb->vips[n_vips].backends[i].port = backend_port;
+ lb->vips[n_vips].backends[i].op = op;
+ lb->vips[n_vips].backends[i].svc_mon_src_ip = svc_mon_src_ip;
+
+ if (lb_health_check && op && svc_mon_src_ip) {
+ const char *protocol = nbrec_lb->protocol;
+ if (!protocol || !protocol[0]) {
+ protocol = "tcp";
+ }
+ lb->vips[n_vips].backends[i].health_check = true;
+ struct service_monitor_info *mon_info =
+ create_or_get_service_mon(ctx, monitor_map, backend_ip,
+ op->nbsp->name, backend_port,
+ protocol);
+
+ ovs_assert(mon_info);
+ sbrec_service_monitor_set_options(
+ mon_info->sbrec_mon, &lb_health_check->options);
+ char *monitor_src_mac = xasprintf(ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(svc_monitor_mac));
+ if (!mon_info->sbrec_mon->src_mac ||
+ strcmp(mon_info->sbrec_mon->src_mac, monitor_src_mac)) {
+ sbrec_service_monitor_set_src_mac(mon_info->sbrec_mon,
+ monitor_src_mac);
+ }
+ free(monitor_src_mac);
+
+ if (!mon_info->sbrec_mon->src_ip ||
+ strcmp(mon_info->sbrec_mon->src_ip, svc_mon_src_ip)) {
+ sbrec_service_monitor_set_src_ip(mon_info->sbrec_mon,
+ svc_mon_src_ip);
+ }
+
+ lb->vips[n_vips].backends[i].sbrec_monitor =
+ mon_info->sbrec_mon;
+ mon_info->required = true;
+ } else {
+ lb->vips[n_vips].backends[i].health_check = false;
+ }
+
+ i++;
+ }
+
+ free(tokstr);
+ n_vips++;
+ }
+
+ return lb;
+}
+
+static void
+ovn_lb_destroy(struct ovn_lb *lb)
+{
+ for (size_t i = 0; i < lb->n_vips; i++) {
+ free(lb->vips[i].vip);
+ free(lb->vips[i].backend_ips);
+
+ for (size_t j = 0; j < lb->vips[i].n_backends; j++) {
+ free(lb->vips[i].backends[j].ip);
+ free(lb->vips[i].backends[j].svc_mon_src_ip);
+ }
+
+ free(lb->vips[i].backends);
+ }
+ free(lb->vips);
+}
+
+static void
+build_ovn_lbs(struct northd_context *ctx, struct hmap *ports,
+ struct hmap *lbs)
+{
+ hmap_init(lbs);
+ struct hmap monitor_map = HMAP_INITIALIZER(&monitor_map);
+
+ const struct sbrec_service_monitor *sbrec_mon;
+ SBREC_SERVICE_MONITOR_FOR_EACH (sbrec_mon, ctx->ovnsb_idl) {
+ uint32_t hash = sbrec_mon->port;
+ hash = hash_string(sbrec_mon->ip, hash);
+ hash = hash_string(sbrec_mon->logical_port, hash);
+ struct service_monitor_info *mon_info = xzalloc(sizeof *mon_info);
+ mon_info->sbrec_mon = sbrec_mon;
+ mon_info->required = false;
+ hmap_insert(&monitor_map, &mon_info->hmap_node, hash);
+ }
+
+ const struct nbrec_load_balancer *nbrec_lb;
+ NBREC_LOAD_BALANCER_FOR_EACH (nbrec_lb, ctx->ovnnb_idl) {
+ ovn_lb_create(ctx, lbs, nbrec_lb, ports, &monitor_map);
+ }
+
+ struct service_monitor_info *mon_info;
+ HMAP_FOR_EACH_POP (mon_info, hmap_node, &monitor_map) {
+ if (!mon_info->required) {
+ sbrec_service_monitor_delete(mon_info->sbrec_mon);
+ }
+
+ free(mon_info);
+ }
+ hmap_destroy(&monitor_map);
+}
+
+static void
+destroy_ovn_lbs(struct hmap *lbs)
+{
+ struct ovn_lb *lb;
+ HMAP_FOR_EACH_POP (lb, hmap_node, lbs) {
+ ovn_lb_destroy(lb);
+ free(lb);
+ }
+}
+
/* Updates the southbound Port_Binding table so that it contains the logical
* switch ports specified by the northbound database.
*
@@ -4314,6 +4609,15 @@ build_pre_lb(struct ovn_datapath *od, struct hmap
*lflows,
ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110,
"nd || nd_rs || nd_ra", "next;");
+ /* Do not send service monitor packets to conntrack. */
+ char *svc_check_match = xasprintf("eth.src == "ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(svc_monitor_mac));
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110,
+ svc_check_match, "next;");
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110,
+ svc_check_match, "next;");
+ free(svc_check_match);
+
/* Allow all packets to go to next tables by default. */
ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
@@ -4981,7 +5285,7 @@ build_lb(struct ovn_datapath *od, struct hmap *lflows)
}
static void
-build_stateful(struct ovn_datapath *od, struct hmap *lflows)
+build_stateful(struct ovn_datapath *od, struct hmap *lflows, struct hmap *lbs)
{
/* Ingress and Egress stateful Table (Priority 0): Packets are
* allowed by default. */
@@ -5015,47 +5319,69 @@ build_stateful(struct ovn_datapath *od, struct hmap
*lflows)
* connection, so it is okay if we do not hit the above match on
* REGBIT_CONNTRACK_COMMIT. */
for (int i = 0; i < od->nbs->n_load_balancer; i++) {
- struct nbrec_load_balancer *lb = od->nbs->load_balancer[i];
- struct smap *vips = &lb->vips;
- struct smap_node *node;
+ struct ovn_lb *lb =
+ ovn_lb_find(lbs, &od->nbs->load_balancer[i]->header_.uuid);
+ ovs_assert(lb);
- SMAP_FOR_EACH (node, vips) {
- uint16_t port = 0;
- int addr_family;
+ for (size_t j = 0; j < lb->n_vips; j++) {
+ struct lb_vip *lb_vip = &lb->vips[j];
+ /* New connections in Ingress table. */
+ struct ds action = DS_EMPTY_INITIALIZER;
+ if (lb_vip->health_check) {
+ ds_put_cstr(&action, "ct_lb(");
+
+ size_t n_active_backends = 0;
+ for (size_t k = 0; k < lb_vip->n_backends; k++) {
+ struct lb_vip_backend *backend = &lb_vip->backends[k];
+ bool is_up = true;
+ if (backend->health_check && backend->sbrec_monitor &&
+ backend->sbrec_monitor->status &&
+ strcmp(backend->sbrec_monitor->status, "online")) {
+ is_up = false;
+ }
- /* node->key contains IP:port or just IP. */
- char *ip_address = NULL;
- ip_address_and_port_from_lb_key(node->key, &ip_address, &port,
- &addr_family);
- if (!ip_address) {
- continue;
+ if (is_up) {
+ n_active_backends++;
+ ds_put_format(&action, "%s:%"PRIu16",",
+ backend->ip, backend->port);
+ }
+ }
+
+ if (!n_active_backends) {
+ ds_clear(&action);
+ ds_put_cstr(&action, "drop;");
+ } else {
+ ds_chomp(&action, ',');
+ ds_put_cstr(&action, ");");
+ }
+ } else {
+ ds_put_format(&action, "ct_lb(%s);", lb_vip->backend_ips);
}
- /* New connections in Ingress table. */
- char *action = xasprintf("ct_lb(%s);", node->value);
struct ds match = DS_EMPTY_INITIALIZER;
- if (addr_family == AF_INET) {
- ds_put_format(&match, "ct.new && ip4.dst == %s", ip_address);
+ if (lb_vip->addr_family == AF_INET) {
+ ds_put_format(&match, "ct.new && ip4.dst == %s", lb_vip->vip);
} else {
- ds_put_format(&match, "ct.new && ip6.dst == %s", ip_address);
+ ds_put_format(&match, "ct.new && ip6.dst == %s", lb_vip->vip);
}
- if (port) {
- if (lb->protocol && !strcmp(lb->protocol, "udp")) {
- ds_put_format(&match, " && udp.dst == %d", port);
+ if (lb_vip->vip_port) {
+ if (lb->nlb->protocol && !strcmp(lb->nlb->protocol, "udp")) {
+ ds_put_format(&match, " && udp.dst == %d",
+ lb_vip->vip_port);
} else {
- ds_put_format(&match, " && tcp.dst == %d", port);
+ ds_put_format(&match, " && tcp.dst == %d",
+ lb_vip->vip_port);
}
ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
- 120, ds_cstr(&match), action);
+ 120, ds_cstr(&match), ds_cstr(&action));
} else {
ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
- 110, ds_cstr(&match), action);
+ 110, ds_cstr(&match), ds_cstr(&action));
}
- free(ip_address);
ds_destroy(&match);
- free(action);
- }
+ ds_destroy(&action);
+ }
}
}
@@ -5165,7 +5491,8 @@ static void
build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
struct hmap *port_groups, struct hmap *lflows,
struct hmap *mcgroups, struct hmap *igmp_groups,
- struct shash *meter_groups)
+ struct shash *meter_groups,
+ struct hmap *lbs)
{
/* This flow table structure is documented in ovn-northd(8), so please
* update ovn-northd.8.xml if you change anything. */
@@ -5187,7 +5514,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap
*ports,
build_acls(od, lflows, port_groups);
build_qos(od, lflows);
build_lb(od, lflows);
- build_stateful(od, lflows);
+ build_stateful(od, lflows, lbs);
}
/* Logical switch ingress table 0: Admission control framework (priority
@@ -5389,6 +5716,47 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap
*ports,
ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
}
+ /* Ingress table 11: ARP/ND responder for service monitor source ip.
+ * (priority 110)*/
+ struct ovn_lb *lb;
+ HMAP_FOR_EACH (lb, hmap_node, lbs) {
+ for (size_t i = 0; i < lb->n_vips; i++) {
+ if (!lb->vips[i].health_check) {
+ continue;
+ }
+
+ for (size_t j = 0; j < lb->vips[i].n_backends; j++) {
+ if (!lb->vips[i].backends[j].op ||
+ !lb->vips[i].backends[j].svc_mon_src_ip) {
+ continue;
+ }
+
+ ds_clear(&match);
+ ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
+ lb->vips[i].backends[j].svc_mon_src_ip);
+ ds_clear(&actions);
+ ds_put_format(&actions,
+ "eth.dst = eth.src; "
+ "eth.src = "ETH_ADDR_FMT"; "
+ "arp.op = 2; /* ARP reply */ "
+ "arp.tha = arp.sha; "
+ "arp.sha = "ETH_ADDR_FMT"; "
+ "arp.tpa = arp.spa; "
+ "arp.spa = %s; "
+ "outport = inport; "
+ "flags.loopback = 1; "
+ "output;",
+ ETH_ADDR_ARGS(svc_monitor_mac),
+ ETH_ADDR_ARGS(svc_monitor_mac),
+ lb->vips[i].backends[j].svc_mon_src_ip);
+ ovn_lflow_add(lflows, lb->vips[i].backends[j].op->od,
+ S_SWITCH_IN_ARP_ND_RSP, 110,
+ ds_cstr(&match), ds_cstr(&actions));
+ }
+ }
+ }
+
+
/* Logical switch ingress table 12 and 13: DHCP options and response
* priority 100 flows. */
HMAP_FOR_EACH (op, key_node, ports) {
@@ -8754,12 +9122,13 @@ static void
build_lflows(struct northd_context *ctx, struct hmap *datapaths,
struct hmap *ports, struct hmap *port_groups,
struct hmap *mcgroups, struct hmap *igmp_groups,
- struct shash *meter_groups)
+ struct shash *meter_groups,
+ struct hmap *lbs)
{
struct hmap lflows = HMAP_INITIALIZER(&lflows);
build_lswitch_flows(datapaths, ports, port_groups, &lflows, mcgroups,
- igmp_groups, meter_groups);
+ igmp_groups, meter_groups, lbs);
build_lrouter_flows(datapaths, ports, &lflows, meter_groups);
/* Push changes to the Logical_Flow table to database. */
@@ -9476,9 +9845,11 @@ ovnnb_db_run(struct northd_context *ctx,
struct hmap mcast_groups;
struct hmap igmp_groups;
struct shash meter_groups = SHASH_INITIALIZER(&meter_groups);
+ struct hmap lbs;
build_datapaths(ctx, datapaths, lr_list);
build_ports(ctx, sbrec_chassis_by_name, datapaths, ports);
+ build_ovn_lbs(ctx, ports, &lbs);
build_ipam(datapaths, ports);
build_port_group_lswitches(ctx, &port_groups, ports);
build_lrouter_groups(ports, lr_list);
@@ -9486,12 +9857,14 @@ ovnnb_db_run(struct northd_context *ctx,
build_mcast_groups(ctx, datapaths, ports, &mcast_groups, &igmp_groups);
build_meter_groups(ctx, &meter_groups);
build_lflows(ctx, datapaths, ports, &port_groups, &mcast_groups,
- &igmp_groups, &meter_groups);
+ &igmp_groups, &meter_groups, &lbs);
sync_address_sets(ctx);
sync_port_groups(ctx);
sync_meters(ctx);
sync_dns_entries(ctx, datapaths);
+ destroy_ovn_lbs(&lbs);
+ hmap_destroy(&lbs);
struct ovn_igmp_group *igmp_group, *next_igmp_group;
@@ -9540,16 +9913,39 @@ ovnnb_db_run(struct northd_context *ctx,
&addr.ea[0], &addr.ea[1], &addr.ea[2])) {
mac_prefix = addr;
}
- } else {
- struct smap options;
+ }
+ const char *svc_monitor_mac_str = smap_get(&nb->options,
+ "svc_monitor_mac");
+ if (svc_monitor_mac_str) {
+ struct eth_addr addr;
+
+ memset(&addr, 0, sizeof addr);
+ if (eth_addr_from_string(svc_monitor_mac_str, &addr)) {
+ svc_monitor_mac = addr;
+ }
+ }
+
+ if (!mac_addr_prefix || !svc_monitor_mac_str) {
+ struct smap options;
smap_clone(&options, &nb->options);
- eth_addr_random(&mac_prefix);
- memset(&mac_prefix.ea[3], 0, 3);
- smap_add_format(&options, "mac_prefix",
- "%02"PRIx8":%02"PRIx8":%02"PRIx8,
- mac_prefix.ea[0], mac_prefix.ea[1], mac_prefix.ea[2]);
+ if (!mac_addr_prefix) {
+ eth_addr_random(&mac_prefix);
+ memset(&mac_prefix.ea[3], 0, 3);
+
+ smap_add_format(&options, "mac_prefix",
+ "%02"PRIx8":%02"PRIx8":%02"PRIx8,
+ mac_prefix.ea[0], mac_prefix.ea[1],
+ mac_prefix.ea[2]);
+ }
+
+ if (!svc_monitor_mac_str) {
+ eth_addr_random(&svc_monitor_mac);
+ smap_add_format(&options, "svc_monitor_mac", ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(svc_monitor_mac));
+ }
+
nbrec_nb_global_verify_options(nb);
nbrec_nb_global_set_options(nb, &options);
@@ -10349,6 +10745,25 @@ main(int argc, char *argv[])
&sbrec_ip_multicast_col_query_interval);
add_column_noalert(ovnsb_idl_loop.idl,
&sbrec_ip_multicast_col_query_max_resp);
+ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_service_monitor);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_service_monitor_col_ip);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_service_monitor_col_logical_port);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_service_monitor_col_port);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_service_monitor_col_options);
+ ovsdb_idl_add_column(ovnsb_idl_loop.idl,
+ &sbrec_service_monitor_col_status);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_service_monitor_col_protocol);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_service_monitor_col_src_mac);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_service_monitor_col_src_ip);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_service_monitor_col_external_ids);
struct ovsdb_idl_index *sbrec_chassis_by_name
= chassis_index_create(ovnsb_idl_loop.idl);
diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
index 2c87cbba7..4477fc08f 100644
--- a/ovn-nb.ovsschema
+++ b/ovn-nb.ovsschema
@@ -1,7 +1,7 @@
{
"name": "OVN_Northbound",
- "version": "5.16.0",
- "cksum": "923459061 23095",
+ "version": "5.17.0",
+ "cksum": "1015672974 24054",
"tables": {
"NB_Global": {
"columns": {
@@ -152,10 +152,31 @@
"type": {"key": {"type": "string",
"enum": ["set", ["tcp", "udp"]]},
"min": 0, "max": 1}},
+ "health_check": {"type": {
+ "key": {"type": "uuid",
+ "refTable": "Load_Balancer_Health_Check",
+ "refType": "strong"},
+ "min": 0,
+ "max": "unlimited"}},
+ "ip_port_mappings": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}},
"external_ids": {
"type": {"key": "string", "value": "string",
"min": 0, "max": "unlimited"}}},
"isRoot": true},
+ "Load_Balancer_Health_Check": {
+ "columns": {
+ "vip": {"type": "string"},
+ "options": {
+ "type": {"key": "string",
+ "value": "string",
+ "min": 0,
+ "max": "unlimited"}},
+ "external_ids": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}}},
+ "isRoot": false},
"ACL": {
"columns": {
"name": {"type": {"key": {"type": "string",
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 899089422..70e998f3e 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -1297,6 +1297,74 @@
</p>
</column>
+ <column name="health_check">
+ Load balancer health checks associated with this load balancer.
+ If health check is desired for a vip's endpoints defined in
+ the <ref column="vips" table="Load_Balancer" db="OVN_Northbound"/>
+ column, then a row in the table
+ <ref table="Load_Balancer_Health_Check" db="OVN_Northbound"/> should
+ be created and referenced here and L4 port should be defined
+ for the vip and it's endpoints. Health checks are supported only
+ for IPv4 load balancers.
+ </column>
+
+ <column name="ip_port_mappings">
+ <p>
+ This column is used if load balancer health checks are enabled.
+ This keeps a mapping of endpoint IP to the logical port name.
+ The source ip to be used for health checks is also expected to be
+ defined. The key of the mapping is the endpoint IP and the value
+ is in the format : <code>port_name:SRC_IP</code>
+ </p>
+
+ <p>
+ Eg. If there is a VIP entry:
+ <code>"10.0.0.10:80=10.0.0.4:8080,20.0.0.4:8080"</code>,
+ then the IP to port mappings should be defined as:
+ <code>"10.0.0.4"="sw0-p1:10.0.0.2"</code> and
+ <code>"20.0.0.4"="sw1-p1:20.0.0.2"</code>. <code>10.0.0.2</code>
+ and <code>20.0.0.2</code> will be used by <code>ovn-controller</code>
+ as source ip when it sends out health check packets.
+ </p>
+ </column>
+
+ <group title="Common Columns">
+ <column name="external_ids">
+ See <em>External IDs</em> at the beginning of this document.
+ </column>
+ </group>
+ </table>
+
+ <table name="Load_Balancer_Health_Check" title="load balancer">
+ <p>
+ Each row represents one load balancer health check. Health checks
+ are supported for IPv4 load balancers only.
+ </p>
+
+ <column name="vip">
+ <code>vip</code> whose endpoints should be monitored for health check.
+ </column>
+
+ <group title="Health check options">
+ <column name="options" key="interval" type='{"type": "integer"}'>
+ The interval, in seconds, between health checks.
+ </column>
+
+ <column name="options" key="timeout" type='{"type": "integer"}'>
+ The time, in seconds, after which a health check times out.
+ </column>
+
+ <column name="options" key="success_count" type='{"type": "integer"}'>
+ The number of successful checks after which the endpoint is
+ considered online.
+ </column>
+
+ <column name="options" key="failure_count" type='{"type": "integer"}'>
+ The number of failure checks after which the endpoint is considered
+ offline.
+ </column>
+ </group>
+
<group title="Common Columns">
<column name="external_ids">
See <em>External IDs</em> at the beginning of this document.
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index 5c013b17e..56af0ed3e 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,7 +1,7 @@
{
"name": "OVN_Southbound",
- "version": "2.5.0",
- "cksum": "1257419092 20387",
+ "version": "2.6.0",
+ "cksum": "4271405686 21646",
"tables": {
"SB_Global": {
"columns": {
@@ -403,4 +403,31 @@
"refType": "weak"},
"min": 0, "max": "unlimited"}}},
"indexes": [["address", "datapath", "chassis"]],
- "isRoot": true}}}
+ "isRoot": true},
+ "Service_Monitor": {
+ "columns": {
+ "ip": {"type": "string"},
+ "protocol": {
+ "type": {"key": {"type": "string",
+ "enum": ["set", ["tcp", "udp"]]},
+ "min": 0, "max": 1}},
+ "port": {"type": {"key": {"type": "integer",
+ "minInteger": 0,
+ "maxInteger": 32767}}},
+ "logical_port": {"type": "string"},
+ "src_mac": {"type": "string"},
+ "src_ip": {"type": "string"},
+ "status": {
+ "type": {"key": {"type": "string",
+ "enum": ["set", ["online", "offline", "error"]]},
+ "min": 0, "max": 1}},
+ "options": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}},
+ "external_ids": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}}},
+ "indexes": [["logical_port", "ip", "port", "protocol"]],
+ "isRoot": true}
+ }
+}
diff --git a/ovn-sb.xml b/ovn-sb.xml
index e5fb51a9d..335f9031b 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -3743,4 +3743,89 @@ tcp.flags = RST;
The destination port bindings for this IGMP group.
</column>
</table>
+
+ <table name="Service_Monitor">
+ <p>
+ This table montiors a service for its liveliness. The service
+ can be an IPv4 tcp or a udp service. <code>ovn-controller</code>
+ periodically sends out service monitor packets and updates the
+ status of the service. Service monitoring for IPv6 services is
+ not supported.
+ </p>
+
+ <column name="ip">
+ IP of the service to be monitored. Only IPv4 is supported.
+ </column>
+
+ <column name="protocol">
+ The protocol of the service. It can be either <code>tcp</code> or
+ <code>udp</code>.
+ </column>
+
+ <column name="port">
+ The tcp or udp port of the service.
+ </column>
+
+ <column name="logical_port">
+ The VIF of logical port on which the service is running. The
+ <code>ovn-controller</code> which binds this <code>logical_port</code>
+ monitors the service by sending periodic monitor packets.
+ </column>
+
+ <column name="status">
+ <p>
+ The <code>ovn-controller</code> which binds the
+ <code>logical_port</code> updates the status to <code>online</code>
+ <code>offline</code> or <code>error</code>.
+ </p>
+
+ <p>
+ For tcp service, <code>ovn-controller</code> sends a
+ <code>TCP SYN</code> packet to the service and expects a
+ <code>TCP ACK</code> response to consider the service to be
+ <code>online</code>.
+ </p>
+
+ <p>
+ For udp service, <code>ovn-controller</code> sends a <code>udp</code>
+ packet to the service and doesn't expect any reply. If it receives
+ ICMP reply, then it considers the service to be <code>offline</code>.
+ </p>
+ </column>
+
+ <column name="src_mac">
+ Source Ethernet address to use in the service monitor packet.
+ </column>
+
+ <column name="src_ip">
+ Source IPv4 address to use in the service monitor packet.
+ </column>
+
+ <group title="Service monitor options">
+ <column name="options" key="interval" type='{"type": "integer"}'>
+ The interval, in seconds, between service monitor checks.
+ </column>
+
+ <column name="options" key="timeout" type='{"type": "integer"}'>
+ The time, in seconds, after which the service monitor check times
+ out.
+ </column>
+
+ <column name="options" key="success_count" type='{"type": "integer"}'>
+ The number of successful checks after which the service is
+ considered <code>online</code>.
+ </column>
+
+ <column name="options" key="failure_count" type='{"type": "integer"}'>
+ The number of failure checks after which the service is considered
+ <code>offline</code>.
+ </column>
+ </group>
+
+ <group title="Common Columns">
+ <column name="external_ids">
+ See <em>External IDs</em> at the beginning of this document.
+ </column>
+ </group>
+ </table>
</database>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 42033d589..e9a1fe0a3 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -966,3 +966,218 @@ OVS_WAIT_UNTIL([ovn-sbctl get Port_Binding ${uuid}
options:redirect-type], [0],
])
AT_CLEANUP
+
+AT_SETUP([ovn -- check Load balancer health check and Service Monitor sync])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+ovn-nbctl lb-add lb1 10.0.0.10:80 10.0.0.3:80,20.0.0.3:80
+
+ovn-nbctl --wait=sb set load_balancer . ip_port_mappings:10.0.0.3=sw0-p1
+ovn-nbctl --wait=sb set load_balancer . ip_port_mappings:20.0.0.3=sw1-p1
+
+OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list service_monitor | wc -l`])
+
+ovn-nbctl --wait=sb -- --id=@hc create \
+Load_Balancer_Health_Check vip="10.0.0.10\:80" -- add Load_Balancer . \
+health_check @hc
+
+OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list service_monitor | wc -l`])
+
+# create logical switches and ports
+ovn-nbctl ls-add sw0
+ovn-nbctl --wait=sb lsp-add sw0 sw0-p1 -- lsp-set-addresses sw0-p1 \
+"00:00:00:00:00:03 10.0.0.3"
+
+OVS_WAIT_UNTIL([test 0 = `ovn-sbctl --bare --columns _uuid find \
+service_monitor | wc -l`])
+
+ovn-nbctl ls-add sw1
+ovn-nbctl --wait=sb lsp-add sw1 sw1-p1 -- lsp-set-addresses sw1-p1 \
+"02:00:00:00:00:03 20.0.0.3"
+
+OVS_WAIT_UNTIL([test 0 = `ovn-sbctl --bare --columns _uuid find \
+service_monitor | sed '/^$/d' | wc -l`])
+
+ovn-nbctl --wait=sb set load_balancer .
ip_port_mappings:10.0.0.3=sw0-p1:10.0.0.2
+OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns _uuid find \
+service_monitor | wc -l`])
+
+ovn-nbctl --wait=sb set load_balancer .
ip_port_mappings:20.0.0.3=sw1-p1:20.0.0.2
+
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl --bare --columns _uuid find \
+service_monitor | sed '/^$/d' | wc -l`])
+
+ovn-nbctl --wait=sb ls-lb-add sw0 lb1
+
+ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10
&& tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+])
+
+# Delete the Load_Balancer_Health_Check
+ovn-nbctl --wait=sb clear load_balancer . health_check
+OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list service_monitor | wc -l`])
+
+ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10
&& tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+])
+
+# Create the Load_Balancer_Health_Check again.
+ovn-nbctl --wait=sb -- --id=@hc create \
+Load_Balancer_Health_Check vip="10.0.0.10\:80" -- add Load_Balancer . \
+health_check @hc
+
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl --bare --columns _uuid find \
+service_monitor | sed '/^$/d' | wc -l`])
+
+ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10
&& tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+])
+
+# Get the uuid of both the service_monitor
+sm_sw0_p1=`ovn-sbctl --bare --columns _uuid find service_monitor
logical_port=sw0-p1`
+sm_sw1_p1=`ovn-sbctl --bare --columns _uuid find service_monitor
logical_port=sw1-p1`
+
+# Set the service monitor for sw1-p1 to offline
+ovn-sbctl set service_monitor $sm_sw1_p1 status=offline
+
+OVS_WAIT_UNTIL([
+ status=`ovn-sbctl --bare --columns status find service_monitor
logical_port=sw1-p1`
+ test "$status" = "offline"])
+
+ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10
&& tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
+])
+
+# Set the service monitor for sw0-p1 to offline
+ovn-sbctl set service_monitor $sm_sw0_p1 status=offline
+
+OVS_WAIT_UNTIL([
+ status=`ovn-sbctl --bare --columns status find service_monitor
logical_port=sw0-p1`
+ test "$status" = "offline"])
+
+ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+])
+
+ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" \
+| grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10
&& tcp.dst == 80), action=(drop;)
+])
+
+# Set the service monitor for sw0-p1 and sw1-p1 to online
+ovn-sbctl set service_monitor $sm_sw0_p1 status=online
+ovn-sbctl set service_monitor $sm_sw1_p1 status=online
+
+OVS_WAIT_UNTIL([
+ status=`ovn-sbctl --bare --columns status find service_monitor
logical_port=sw1-p1`
+ test "$status" = "online"])
+
+ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10
&& tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+])
+
+# Set the service monitor for sw1-p1 to error
+ovn-sbctl set service_monitor $sm_sw1_p1 status=error
+OVS_WAIT_UNTIL([
+ status=`ovn-sbctl --bare --columns status find service_monitor
logical_port=sw1-p1`
+ test "$status" = "error"])
+
+ovn-sbctl dump-flows sw0 | grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" \
+| grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10
&& tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
+])
+
+# Add one more vip to lb1
+
+ovn-nbctl set load_balancer . vip:"10.0.0.40\:1000"="10.0.0.3:1000,20.0.0.3:80"
+
+# create health_check for new vip - 10.0.0.40
+ovn-nbctl --wait=sb -- --id=@hc create \
+Load_Balancer_Health_Check vip="10.0.0.40\:1000" -- add Load_Balancer . \
+health_check @hc
+
+# There should be totally 3 rows in service_monitor for -
+# * 10.0.0.3:80
+# * 10.0.0.3:1000
+# * 20.0.0.3:80
+
+OVS_WAIT_UNTIL([test 3 = `ovn-sbctl --bare --columns _uuid find \
+service_monitor | sed '/^$/d' | wc -l`])
+
+# There should be 2 rows with logical_port=sw0-p1
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl --bare --columns _uuid find \
+service_monitor logical_port=sw0-p1 | sed '/^$/d' | wc -l`])
+
+# There should be 1 row1 with port=1000
+OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns _uuid find \
+service_monitor port=1000 | sed '/^$/d' | wc -l`])
+
+ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10
&& tcp.dst == 80), action=(ct_lb(10.0.0.3:80);)
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40
&& tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000);)
+])
+
+# Set the service monitor for sw1-p1 to online
+ovn-sbctl set service_monitor $sm_sw1_p1 status=online
+
+OVS_WAIT_UNTIL([
+ status=`ovn-sbctl --bare --columns status find service_monitor
logical_port=sw1-p1`
+ test "$status" = "online"])
+
+ovn-sbctl dump-flows sw0 | grep ct_lb | grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10
&& tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40
&& tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);)
+])
+
+# Associate lb1 to sw1
+ovn-nbctl --wait=sb ls-lb-add sw1 lb1
+ovn-sbctl dump-flows sw1 | grep ct_lb | grep priority=120 > lflows.txt
+AT_CHECK([cat lflows.txt], [0], [dnl
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10
&& tcp.dst == 80), action=(ct_lb(10.0.0.3:80,20.0.0.3:80);)
+ table=10(ls_in_stateful ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.40
&& tcp.dst == 1000), action=(ct_lb(10.0.0.3:1000,20.0.0.3:80);)
+])
+
+# Now create lb2 same as lb1 but udp protocol.
+ovn-nbctl lb-add lb2 10.0.0.10:80 10.0.0.3:80,20.0.0.3:80 udp
+lb2_uuid=`ovn-nbctl lb-list | grep udp | awk '{print $1}'`
+ovn-nbctl --wait=sb set load_balancer $lb2_uuid
ip_port_mappings:10.0.0.3=sw0-p1:10.0.0.2
+ovn-nbctl --wait=sb set load_balancer $lb2_uuid
ip_port_mappings:20.0.0.3=sw1-p1:20.0.0.2
+
+ovn-nbctl -- --id=@hc create Load_Balancer_Health_Check vip="10.0.0.10\:80" --
add Load_Balancer $lb2_uuid health_check @hc
+
+ovn-nbctl ls-lb-add sw0 lb2
+ovn-nbctl ls-lb-add sw1 lb2
+ovn-nbctl lr-lb-add lr0 lb2
+
+OVS_WAIT_UNTIL([test 5 = `ovn-sbctl --bare --columns _uuid find \
+service_monitor | sed '/^$/d' | wc -l`])
+
+# Change the svc_monitor_mac. This should get reflected in service_monitor
table rows.
+ovn-nbctl set NB_Global . options:svc_monitor_mac="fe:a0:65:a2:01:03"
+
+OVS_WAIT_UNTIL([test 5 = `ovn-sbctl --bare --columns src_mac find \
+service_monitor | grep "fe:a0:65:a2:01:03" | wc -l`])
+
+# Change the source ip for 10.0.0.3 backend ip in lb2
+ovn-nbctl --wait=sb set load_balancer $lb2_uuid
ip_port_mappings:10.0.0.3=sw0-p1:10.0.0.100
+
+OVS_WAIT_UNTIL([test 1 = `ovn-sbctl --bare --columns src_ip find \
+service_monitor logical_port=sw0-p1 | grep "10.0.0.100" | wc -l`])
+
+ovn-nbctl --wait=sb lb-del lb1
+OVS_WAIT_UNTIL([test 2 = `ovn-sbctl --bare --columns _uuid find
service_monitor | sed '/^$/d' | wc -l`])
+
+ovn-nbctl --wait=sb lb-del lb2
+OVS_WAIT_UNTIL([test 0 = `ovn-sbctl list service_monitor | wc -l`])
+
+AT_CLEANUP