Currently, even statless flows are subject to connection tracking when there are LB rules (for DNAT). However, if a flow needs to be subjected to LB, then it shouldn't be configured for stateless.
A stateles flows means we should not track it, and this change exempts stateless flows from being tracked regardless of whether LB rules are present or not. Signed-off-by: "venu.iyer" <venugop...@nvidia.com> Acked-by: Han Zhou <hz...@ovn.org> --- northd/northd.c | 24 +++++++++++++----- northd/ovn-northd.8.xml | 56 ++++++++++++++++++++++------------------- ovn-nb.xml | 3 +++ tests/ovn-northd.at | 48 ++++++++++++----------------------- tests/ovn.at | 4 +-- 5 files changed, 69 insertions(+), 66 deletions(-) diff --git a/northd/northd.c b/northd/northd.c index b7388afc5..da4beede6 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -137,8 +137,8 @@ enum ovn_stage { PIPELINE_STAGE(SWITCH, IN, L2_UNKNOWN, 24, "ls_in_l2_unknown") \ \ /* Logical switch egress stages. */ \ - PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ - PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \ + PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \ + PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 1, "ls_out_pre_lb") \ PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ PIPELINE_STAGE(SWITCH, OUT, ACL_HINT, 3, "ls_out_acl_hint") \ PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ @@ -210,6 +210,7 @@ enum ovn_stage { #define REGBIT_ACL_LABEL "reg0[13]" #define REGBIT_FROM_RAMP "reg0[14]" #define REGBIT_PORT_SEC_DROP "reg0[15]" +#define REGBIT_ACL_STATELESS "reg0[16]" #define REG_ORIG_DIP_IPV4 "reg1" #define REG_ORIG_DIP_IPV6 "xxreg1" @@ -271,7 +272,7 @@ enum ovn_stage { * | R0 | REGBIT_{CONNTRACK/DHCP/DNS} | | | * | | REGBIT_{HAIRPIN/HAIRPIN_REPLY} | | | * | | REGBIT_ACL_HINT_{ALLOW_NEW/ALLOW/DROP/BLOCK} | | | - * | | REGBIT_ACL_LABEL | X | | + * | | REGBIT_ACL_{LABEL/STATELESS} | X | | * +----+----------------------------------------------+ X | | * | R1 | ORIG_DIP_IPV4 (>= IN_PRE_STATEFUL) | R | | * +----+----------------------------------------------+ E | | @@ -5677,17 +5678,18 @@ build_stateless_filter(struct ovn_datapath *od, const struct nbrec_acl *acl, struct hmap *lflows) { + const char *action = REGBIT_ACL_STATELESS" = 1; next;"; if (!strcmp(acl->direction, "from-lport")) { ovn_lflow_add_with_hint(lflows, od, S_SWITCH_IN_PRE_ACL, acl->priority + OVN_ACL_PRI_OFFSET, acl->match, - "next;", + action, &acl->header_); } else { ovn_lflow_add_with_hint(lflows, od, S_SWITCH_OUT_PRE_ACL, acl->priority + OVN_ACL_PRI_OFFSET, acl->match, - "next;", + action, &acl->header_); } } @@ -5779,6 +5781,10 @@ build_pre_acls(struct ovn_datapath *od, const struct hmap *port_groups, REGBIT_CONNTRACK_DEFRAG" = 1; next;"); ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;"); + } else if (od->has_lb_vip) { + /* We'll build stateless filters if there are LB rules so that + * the stateless flows are not tracked in pre-lb. */ + build_stateless_filters(od, port_groups, lflows); } } @@ -5913,6 +5919,11 @@ build_pre_lb(struct ovn_datapath *od, const struct shash *meter_groups, S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB, 110, lflows); } + /* Do not sent statless flows via conntrack */ + ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 110, + REGBIT_ACL_STATELESS" == 1", "next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 110, + REGBIT_ACL_STATELESS" == 1", "next;"); /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send * packet to conntrack for defragmentation and possibly for unNATting. @@ -6918,7 +6929,8 @@ build_lb_rules_pre_stateful(struct hmap *lflows, struct ovn_northd_lb *lb, } ds_put_format(action, "%s;", ct_lb_mark ? "ct_lb_mark" : "ct_lb"); - ds_put_format(match, "%s.dst == %s", ip_match, lb_vip->vip_str); + ds_put_format(match, REGBIT_CONNTRACK_NAT" == 1 && %s.dst == %s", + ip_match, lb_vip->vip_str); if (lb_vip->vip_port) { ds_put_format(match, " && %s.dst == %d", proto, lb_vip->vip_port); } diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index a70f2e678..162ec2b3b 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -440,7 +440,9 @@ priority-110 flow is added to skip over stateful ACLs. Multicast, IPv6 Neighbor Discovery and MLD traffic also skips stateful ACLs. For "allow-stateless" ACLs, a flow is added to bypass setting the hint for - connection tracker processing. + connection tracker processing when there are statelful ACLs or LB rules; + <code>REGBIT_ACL_STATELESS</code> is set for traffic matching such + flows for this purpose. </p> <p> @@ -460,8 +462,10 @@ in ingress table <code>LB</code> and <code>Stateful</code>. It contains a priority-0 flow that simply moves traffic to the next table. Moreover it contains two priority-110 flows to move multicast, IPv6 Neighbor - Discovery and MLD traffic to the next table. If load balancing rules with - virtual IP addresses (and ports) are configured in + Discovery and MLD traffic to the next table. It also contains two + priority-110 flows to move stateless traffic, i.e traffic for which + <code>REGBIT_ACL_STATELESS</code> is set, to the next table. If load + balancing rules with virtual IP addresses (and ports) are configured in <code>OVN_Northbound</code> database for a logical switch datapath, a priority-100 flow is added with the match <code>ip</code> to match on IP packets and sets the action <code>reg0[2] = 1; next;</code> to act as a @@ -1859,19 +1863,11 @@ output; </li> </ul> - <h3>Egress Table 0: Pre-LB</h3> + <h3>Egress Table 0: <code>to-lport</code> Pre-ACLs</h3> <p> - This table is similar to ingress table <code>Pre-LB</code>. It - contains a priority-0 flow that simply moves traffic to the next table. - Moreover it contains two priority-110 flows to move multicast, IPv6 - Neighbor Discovery and MLD traffic to the next table. If any load - balancing rules exist for the datapath, a priority-100 flow is added with - a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> - to act as a hint for table <code>Pre-stateful</code> to send IP packets - to the connection tracker for packet de-fragmentation and possibly DNAT - the destination VIP to one of the selected backend for already committed - load balanced traffic. + This is similar to ingress table <code>Pre-ACLs</code> except for + <code>to-lport</code> traffic. </p> <p> @@ -1884,11 +1880,28 @@ output; db="OVN_Northbound"/> table. </p> - <h3>Egress Table 1: <code>to-lport</code> Pre-ACLs</h3> + <p> + This table also has a priority-110 flow with the match + <code>outport == <var>I</var></code> for all logical switch + datapaths to move traffic to the next table. Where <var>I</var> + is the peer of a logical router port. This flow is added to + skip the connection tracking of packets which will be entering + logical router datapath from logical switch datapath for routing. + </p> + + <h3>Egress Table 1: Pre-LB</h3> <p> - This is similar to ingress table <code>Pre-ACLs</code> except for - <code>to-lport</code> traffic. + This table is similar to ingress table <code>Pre-LB</code>. It + contains a priority-0 flow that simply moves traffic to the next table. + Moreover it contains two priority-110 flows to move multicast, IPv6 + Neighbor Discovery and MLD traffic to the next table. If any load + balancing rules exist for the datapath, a priority-100 flow is added with + a match of <code>ip</code> and action of <code>reg0[2] = 1; next;</code> + to act as a hint for table <code>Pre-stateful</code> to send IP packets + to the connection tracker for packet de-fragmentation and possibly DNAT + the destination VIP to one of the selected backend for already committed + load balanced traffic. </p> <p> @@ -1901,15 +1914,6 @@ output; db="OVN_Northbound"/> table. </p> - <p> - This table also has a priority-110 flow with the match - <code>outport == <var>I</var></code> for all logical switch - datapaths to move traffic to the next table. Where <var>I</var> - is the peer of a logical router port. This flow is added to - skip the connection tracking of packets which will be entering - logical router datapath from logical switch datapath for routing. - </p> - <h3>Egress Table 2: Pre-stateful</h3> <p> diff --git a/ovn-nb.xml b/ovn-nb.xml index f41e9d7c0..140dd9a4f 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -2063,6 +2063,9 @@ outgoing TCP traffic directed to an IP address, then you probably also want to define another rule to allow incoming TCP traffic coming from this same IP address. + In addition, traffic that matches stateless ACLs will bypass + load-balancer DNAT/un-DNAT processing. Stateful ACLs should be + used instead if the traffic is supposed to be load-balanced. </li> <li> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at index 4f399eccb..85d5acfed 100644 --- a/tests/ovn-northd.at +++ b/tests/ovn-northd.at @@ -2056,27 +2056,27 @@ check ovn-nbctl ls-lb-add sw0 lb1 check ovn-nbctl add load_balancer_group $lbg load_balancer $lb3 check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl ls-lb-add sw0 lb2 check ovn-nbctl add load_balancer_group $lbg load_balancer $lb4 check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb1 vips check ovn-nbctl clear load_balancer $lb3 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb2 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb4 vips @@ -2091,7 +2091,7 @@ check ovn-nbctl set load_balancer $lb4 vips:"10.0.0.13"="10.0.0.6" check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) # Now reverse the order of clearing the vip. @@ -2099,13 +2099,13 @@ check ovn-nbctl clear load_balancer $lb2 vips check ovn-nbctl clear load_balancer $lb4 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb1 vips check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep "ls_out_pre_lb.*priority=100" | grep reg0 | sort], [0], [dnl - table=0 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) + table=1 (ls_out_pre_lb ), priority=100 , match=(ip), action=(reg0[[2]] = 1; next;) ]) check ovn-nbctl clear load_balancer $lb3 vips @@ -3044,18 +3044,10 @@ for direction in from to; do done ovn-nbctl --wait=sb sync -# TCP packets should go to conntrack for load balancing. +# TCP packets should not go to conntrack for load balancing. flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl -ct_lb_mark { - ct_lb_mark { - reg0[[6]] = 0; - reg0[[12]] = 0; - ct_lb_mark /* default (use --ct to customize) */ { - output("lsp2"); - }; - }; -}; +output("lsp2"); ]) # UDP packets still go to conntrack. @@ -3188,18 +3180,10 @@ for direction in from to; do done ovn-nbctl --wait=sb sync -# TCP packets should go to conntrack for load balancing. +# TCP packets should not go to conntrack for load balancing. flow="inport == \"lsp1\" && ${flow_eth} && ${flow_ip} && ${flow_tcp}" AT_CHECK_UNQUOTED([ovn_trace --ct new --ct new --minimal ls "${flow}"], [0], [dnl -ct_lb_mark { - ct_lb_mark { - reg0[[6]] = 0; - reg0[[12]] = 0; - ct_lb_mark /* default (use --ct to customize) */ { - output("lsp2"); - }; - }; -}; +output("lsp2"); ]) # UDP packets still go to conntrack. @@ -4015,8 +3999,8 @@ check_stateful_flows() { table=? (ls_in_pre_stateful ), priority=0 , match=(1), action=(next;) table=? (ls_in_pre_stateful ), priority=100 , match=(reg0[[0]] == 1), action=(ct_next;) table=? (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) - table=? (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + table=? (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.20 && tcp.dst == 80), action=(reg1 = 10.0.0.20; reg2[[0..15]] = 80; ct_lb_mark;) ]) AT_CHECK([grep "ls_in_lb" sw0flows | sort | sed 's/table=../table=??/'], [0], [dnl @@ -7650,7 +7634,7 @@ check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) table=11(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) @@ -7662,7 +7646,7 @@ check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && reg0 == 66.66.66.66 && ct_label.natted == 1), action=(next;) table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && reg0 == 66.66.66.66), action=(ct_lb(backends=42.42.42.2);) - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb;) table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) table=11(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb(backends=42.42.42.2);) table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb;) @@ -7674,7 +7658,7 @@ check ovn-nbctl --wait=sb sync AT_CHECK([ovn-sbctl lflow-list | grep -e natted -e ct_lb], [0], [dnl table=6 (lr_in_dnat ), priority=110 , match=(ct.est && ip4 && reg0 == 66.66.66.66 && ct_mark.natted == 1), action=(next;) table=6 (lr_in_dnat ), priority=110 , match=(ct.new && ip4 && reg0 == 66.66.66.66), action=(ct_lb_mark(backends=42.42.42.2);) - table=6 (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) + table=6 (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 66.66.66.66), action=(reg1 = 66.66.66.66; ct_lb_mark;) table=6 (ls_in_pre_stateful ), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) table=11(ls_in_lb ), priority=110 , match=(ct.new && ip4.dst == 66.66.66.66), action=(reg0[[1]] = 0; ct_lb_mark(backends=42.42.42.2);) table=2 (ls_out_pre_stateful), priority=110 , match=(reg0[[2]] == 1), action=(ct_lb_mark;) diff --git a/tests/ovn.at b/tests/ovn.at index f8b8db4df..f43455f60 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -23656,7 +23656,7 @@ OVS_WAIT_FOR_OUTPUT( [ovn-sbctl dump-flows > sbflows ovn-sbctl dump-flows sw0 | grep ct_lb_mark | grep priority=120 | sed 's/table=..//'], 0, [dnl - (ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + (ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) (ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg0[[1]] = 0; ct_lb_mark(backends=10.0.0.3:80,20.0.0.3:80; hash_fields="ip_dst,ip_src,tcp_dst,tcp_src");) ]) @@ -23699,7 +23699,7 @@ ovn-sbctl dump-flows sw0 > sbflows3 AT_CHECK( [grep "ip4.dst == 10.0.0.10 && tcp.dst == 80" sbflows3 | grep priority=120 |\ sed 's/table=../table=??/'], [0], [dnl - table=??(ls_in_pre_stateful ), priority=120 , match=(ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) + table=??(ls_in_pre_stateful ), priority=120 , match=(reg0[[2]] == 1 && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(reg1 = 10.0.0.10; reg2[[0..15]] = 80; ct_lb_mark;) table=??(ls_in_lb ), priority=120 , match=(ct.new && ip4.dst == 10.0.0.10 && tcp.dst == 80), action=(drop;) ]) -- 2.17.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev