This commit fixes the build_distr_lrouter_nat_flows_for_lb function to include one NAT stateless flow entry for each DGP in use. Since we have added support to create multiple gateway ports per logical router, it's necessary to include in the LR nat rules pipeline a specific entry for each attached DGP. Otherwise, the ingress traffic is only redirected when the incoming LRP matches the chassis_resident field.
Considering that DNAT rules for DGPs were implemented with the need to configure the DGP-related gateway-port column, the load-balancer NAT rule configuration can use a similar idea. In this case, we don't know the LRP responsible for the incoming traffic, and therefore we need to automatically apply a stateless NAT rule to the load-balancer on all DGPs to allow inbound traffic. After applying this patch, the incoming and/or outgoing traffic can pass through any chassis where the DGP resides without having problems with CT state. Fixes: 15348b7b806f ("ovn-northd: Multiple distributed gateway port support.") Signed-off-by: Roberto Bartzen Acosta <roberto.aco...@luizalabs.com> --- northd/en-lr-stateful.c | 12 ----- northd/northd.c | 67 ++++++++++++++++++------ tests/ovn-northd.at | 111 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 27 deletions(-) diff --git a/northd/en-lr-stateful.c b/northd/en-lr-stateful.c index baf1bd2f8..f09691af6 100644 --- a/northd/en-lr-stateful.c +++ b/northd/en-lr-stateful.c @@ -516,18 +516,6 @@ lr_stateful_record_create(struct lr_stateful_table *table, table->array[od->index] = lr_stateful_rec; - /* Load balancers are not supported (yet) if a logical router has multiple - * distributed gateway port. Log a warning. */ - if (lr_stateful_rec->has_lb_vip && lr_has_multiple_gw_ports(od)) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); - VLOG_WARN_RL(&rl, "Load-balancers are configured on logical " - "router %s, which has %"PRIuSIZE" distributed " - "gateway ports. Load-balancer is not supported " - "yet when there is more than one distributed " - "gateway port on the router.", - od->nbr->name, od->n_l3dgw_ports); - } - return lr_stateful_rec; } diff --git a/northd/northd.c b/northd/northd.c index 6898daa00..9d22698c9 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -11026,24 +11026,25 @@ static void build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx, enum lrouter_nat_lb_flow_type type, struct ovn_datapath *od, - struct lflow_ref *lflow_ref) + struct lflow_ref *lflow_ref, + struct ovn_port *dgp) { - struct ovn_port *dgp = od->l3dgw_ports[0]; - - const char *undnat_action; + struct ds undnat_action = DS_EMPTY_INITIALIZER; + struct ds dnat_action = DS_EMPTY_INITIALIZER; + struct ds snat_action = DS_EMPTY_INITIALIZER; switch (type) { case LROUTER_NAT_LB_FLOW_FORCE_SNAT: - undnat_action = "flags.force_snat_for_lb = 1; next;"; + ds_put_format(&undnat_action, "flags.force_snat_for_lb = 1; next;"); break; case LROUTER_NAT_LB_FLOW_SKIP_SNAT: - undnat_action = "flags.skip_snat_for_lb = 1; next;"; + ds_put_format(&undnat_action, "flags.skip_snat_for_lb = 1; next;"); break; case LROUTER_NAT_LB_FLOW_NORMAL: case LROUTER_NAT_LB_FLOW_MAX: - undnat_action = lrouter_use_common_zone(od) - ? "ct_dnat_in_czone;" - : "ct_dnat;"; + ds_put_format(&undnat_action, "%s", + lrouter_use_common_zone(od) ? "ct_dnat_in_czone;" + : "ct_dnat;"); break; } @@ -11051,6 +11052,31 @@ build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx, size_t new_match_len = ctx->new_match->length; size_t undnat_match_len = ctx->undnat_match->length; + /* Change the logic to create LB NAT rules when we are using DGPs. + * 1. Remove the ct action from the lr_out_undenat NAT rule. + * 2. Add the LB backend IPs as a destination action of the lr_in_dnat + * NAT rule with cumulative effect because any backend dst IP used in + * the action list will redirect the packet to the ct_lb pipeline. + * 3. Add a new lr_out_snat NAT rule with the LB VIP as source IP + * action to perform the NAT stateless pipeline completely. + */ + if (od->n_l3dgw_ports > 1) { + ds_clear(&undnat_action); + ds_put_format(&undnat_action, "next;"); + + for (size_t i = 0; i < ctx->lb_vip->n_backends; i++) { + struct ovn_lb_backend *backend = &ctx->lb_vip->backends[i]; + bool ipv6 = !IN6_IS_ADDR_V4MAPPED(&backend->ip); + ds_put_format(&dnat_action, "%s.dst=%s;", ipv6 ? "ip6" : "ip4", + backend->ip_str); + } + + ds_put_format(&snat_action, "%s.src=%s; next;", + ctx->lb_vip->address_family == AF_INET6 ? "ip6" : "ip4", + ctx->lb_vip->vip_str); + + } + ds_put_format(&dnat_action, "%s", ctx->new_action[type]); const char *meter = NULL; @@ -11060,11 +11086,11 @@ build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx, if (ctx->lb_vip->n_backends || !ctx->lb_vip->empty_backend_rej) { ds_put_format(ctx->new_match, " && is_chassis_resident(%s)", - od->l3dgw_ports[0]->cr_port->json_key); + dgp->cr_port->json_key); } ovn_lflow_add_with_hint__(ctx->lflows, od, S_ROUTER_IN_DNAT, ctx->prio, - ds_cstr(ctx->new_match), ctx->new_action[type], + ds_cstr(ctx->new_match), ds_cstr(&dnat_action), NULL, meter, &ctx->lb->nlb->header_, lflow_ref); @@ -11093,9 +11119,17 @@ build_distr_lrouter_nat_flows_for_lb(struct lrouter_nat_lb_flows_ctx *ctx, " && is_chassis_resident(%s)", dgp->json_key, dgp->json_key, dgp->cr_port->json_key); ovn_lflow_add_with_hint(ctx->lflows, od, S_ROUTER_OUT_UNDNAT, 120, - ds_cstr(ctx->undnat_match), undnat_action, - &ctx->lb->nlb->header_, + ds_cstr(ctx->undnat_match), + ds_cstr(&undnat_action), &ctx->lb->nlb->header_, lflow_ref); + + if (od->n_l3dgw_ports > 1) { + ovn_lflow_add_with_hint(ctx->lflows, od, S_ROUTER_OUT_SNAT, 120, + ds_cstr(ctx->undnat_match), + ds_cstr(&snat_action), &ctx->lb->nlb->header_, + lflow_ref); + } + ds_truncate(ctx->undnat_match, undnat_match_len); } @@ -11263,8 +11297,11 @@ build_lrouter_nat_flows_for_lb( if (!od->n_l3dgw_ports) { bitmap_set1(gw_dp_bitmap[type], index); } else { - build_distr_lrouter_nat_flows_for_lb(&ctx, type, od, - lb_dps->lflow_ref); + for (size_t i = 0; i < od->n_l3dgw_ports; i++) { + struct ovn_port *dgp = od->l3dgw_ports[i]; + build_distr_lrouter_nat_flows_for_lb(&ctx, type, od, + lb_dps->lflow_ref, dgp); + } } if (lb->affinity_timeout) { diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index a389d1988..bb508f9e6 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -12721,3 +12721,114 @@ AT_CHECK([ovn-sbctl dump-flows lr | grep lr_in_dnat | ovn_strip_lflows], [0], [d AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD_NO_HV_PARALLELIZATION([ +AT_SETUP([Load balancer with Distributed Gateway Ports (LB + DGP + NAT Stateless)]) +ovn_start + +check ovn-nbctl ls-add public +check ovn-nbctl lr-add lr1 + +# lr1 DGP ts1 +check ovn-nbctl ls-add ts1 +check ovn-nbctl lrp-add lr1 lr1-ts1 00:00:01:02:03:04 172.16.10.1/24 +check ovn-nbctl lrp-set-gateway-chassis lr1-ts1 chassis-2 + +# lr1 DGP ts2 +check ovn-nbctl ls-add ts2 +check ovn-nbctl lrp-add lr1 lr1-ts2 00:00:01:02:03:05 172.16.20.1/24 +check ovn-nbctl lrp-set-gateway-chassis lr1-ts2 chassis-3 + +# lr1 DGP public +check ovn-nbctl lrp-add lr1 lr1_public 00:de:ad:ff:00:01 173.16.0.1/16 +check ovn-nbctl lrp-add lr1 lr1_s1 00:de:ad:fe:00:02 172.16.0.1/24 +check ovn-nbctl lrp-set-gateway-chassis lr1_public chassis-1 + +check ovn-nbctl ls-add s1 +# s1 - lr1 +check ovn-nbctl lsp-add s1 s1_lr1 +check ovn-nbctl lsp-set-type s1_lr1 router +check ovn-nbctl lsp-set-addresses s1_lr1 "00:de:ad:fe:00:02 172.16.0.1" +check ovn-nbctl lsp-set-options s1_lr1 router-port=lr1_s1 + +# s1 - backend vm1 +check ovn-nbctl lsp-add s1 vm1 +check ovn-nbctl lsp-set-addresses vm1 "00:de:ad:01:00:01 172.16.0.101" + +# s1 - backend vm2 +check ovn-nbctl lsp-add s1 vm2 +check ovn-nbctl lsp-set-addresses vm2 "00:de:ad:01:00:02 172.16.0.102" + +# s1 - backend vm3 +check ovn-nbctl lsp-add s1 vm3 +check ovn-nbctl lsp-set-addresses vm3 "00:de:ad:01:00:03 172.16.0.103" + +# Add the lr1 DGP ts1 to the public switch +check ovn-nbctl lsp-add public public_lr1_ts1 +check ovn-nbctl lsp-set-type public_lr1_ts1 router +check ovn-nbctl lsp-set-addresses public_lr1_ts1 router +check ovn-nbctl lsp-set-options public_lr1_ts1 router-port=lr1-ts1 nat-addresses=router + +# Add the lr1 DGP ts2 to the public switch +check ovn-nbctl lsp-add public public_lr1_ts2 +check ovn-nbctl lsp-set-type public_lr1_ts2 router +check ovn-nbctl lsp-set-addresses public_lr1_ts2 router +check ovn-nbctl lsp-set-options public_lr1_ts2 router-port=lr1-ts2 nat-addresses=router + +# Add the lr1 DGP public to the public switch +check ovn-nbctl lsp-add public public_lr1 +check ovn-nbctl lsp-set-type public_lr1 router +check ovn-nbctl lsp-set-addresses public_lr1 router +check ovn-nbctl lsp-set-options public_lr1 router-port=lr1_public nat-addresses=router + +# Create the Load Balancer lb1 +check ovn-nbctl --wait=sb lb-add lb1 "30.0.0.1" "172.16.0.103,172.16.0.102,172.16.0.101" + +# Associate load balancer to s1 +check ovn-nbctl ls-lb-add s1 lb1 +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows s1 > s1flows +AT_CAPTURE_FILE([s1flows]) + +AT_CHECK([grep "ls_in_pre_stateful" s1flows | ovn_strip_lflows | grep "30.0.0.1"], [0], [dnl + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 30.0.0.1), action=(reg1 = 30.0.0.1; ct_lb_mark;) +]) +AT_CHECK([grep "ls_in_lb" s1flows | ovn_strip_lflows | grep "30.0.0.1"], [0], [dnl + table=??(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 30.0.0.1), action=(reg0[[1]] = 0; ct_lb_mark(backends=172.16.0.103,172.16.0.102,172.16.0.101);) +]) + +# Associate load balancer to lr1 with DGP +check ovn-nbctl lr-lb-add lr1 lb1 +check ovn-nbctl --wait=sb sync + +ovn-sbctl dump-flows lr1 > lr1flows +AT_CAPTURE_FILE([lr1flows]) + +# Check stateless NAT rules for load balancer with multiple DGP +# 1. Check if the backend IPs are in the ipX.dst action +AT_CHECK([grep "lr_in_dnat" lr1flows | ovn_strip_lflows | grep "30.0.0.1"], [0], [dnl + table=??(lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && ip4.dst == 30.0.0.1 && is_chassis_resident("cr-lr1-ts1")), action=(ip4.dst=172.16.0.103;ip4.dst=172.16.0.102;ip4.dst=172.16.0.101;ct_lb_mark(backends=172.16.0.103,172.16.0.102,172.16.0.101);) + table=??(lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && ip4.dst == 30.0.0.1 && is_chassis_resident("cr-lr1-ts2")), action=(ip4.dst=172.16.0.103;ip4.dst=172.16.0.102;ip4.dst=172.16.0.101;ct_lb_mark(backends=172.16.0.103,172.16.0.102,172.16.0.101);) + table=??(lr_in_dnat ), priority=110 , match=(ct.new && !ct.rel && ip4 && ip4.dst == 30.0.0.1 && is_chassis_resident("cr-lr1_public")), action=(ip4.dst=172.16.0.103;ip4.dst=172.16.0.102;ip4.dst=172.16.0.101;ct_lb_mark(backends=172.16.0.103,172.16.0.102,172.16.0.101);) +]) + +# 2. Check if the DGP ports are in the match with action next +AT_CHECK([grep "lr_out_undnat" lr1flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_undnat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_undnat ), priority=120 , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1-ts1" || outport == "lr1-ts1") && is_chassis_resident("cr-lr1-ts1")), action=(next;) + table=??(lr_out_undnat ), priority=120 , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1-ts2" || outport == "lr1-ts2") && is_chassis_resident("cr-lr1-ts2")), action=(next;) + table=??(lr_out_undnat ), priority=120 , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1_public" || outport == "lr1_public") && is_chassis_resident("cr-lr1_public")), action=(next;) +]) + +# 3. Check if the VIP IP is in the ipX.src action +AT_CHECK([grep "lr_out_snat" lr1flows | ovn_strip_lflows], [0], [dnl + table=??(lr_out_snat ), priority=0 , match=(1), action=(next;) + table=??(lr_out_snat ), priority=120 , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1-ts1" || outport == "lr1-ts1") && is_chassis_resident("cr-lr1-ts1")), action=(ip4.src=30.0.0.1; next;) + table=??(lr_out_snat ), priority=120 , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1-ts2" || outport == "lr1-ts2") && is_chassis_resident("cr-lr1-ts2")), action=(ip4.src=30.0.0.1; next;) + table=??(lr_out_snat ), priority=120 , match=(ip4 && ((ip4.src == 172.16.0.103) || (ip4.src == 172.16.0.102) || (ip4.src == 172.16.0.101)) && (inport == "lr1_public" || outport == "lr1_public") && is_chassis_resident("cr-lr1_public")), action=(ip4.src=30.0.0.1; next;) + table=??(lr_out_snat ), priority=120 , match=(nd_ns), action=(next;) +]) + +AT_CLEANUP +]) -- 2.34.1 -- _'Esta mensagem é direcionada apenas para os endereços constantes no cabeçalho inicial. Se você não está listado nos endereços constantes no cabeçalho, pedimos-lhe que desconsidere completamente o conteúdo dessa mensagem e cuja cópia, encaminhamento e/ou execução das ações citadas estão imediatamente anuladas e proibidas'._ * **'Apesar do Magazine Luiza tomar todas as precauções razoáveis para assegurar que nenhum vírus esteja presente nesse e-mail, a empresa não poderá aceitar a responsabilidade por quaisquer perdas ou danos causados por esse e-mail ou por seus anexos'.* _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev