Add missing icmp{4,6}_error flows for ingress traffic destinated to FIP associated to the interface
Related bz: https://bugzilla.redhat.com/show_bug.cgi?id=2018179 Tested-by: Eran Kuris <eku...@redhat.com> Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com> --- Changes since v1: - add missing northd documentation --- northd/northd.c | 87 ++++++++++++++++++++++++++++++++++++++--- northd/ovn-northd.8.xml | 55 +++++++++++++++++++++++++- tests/ovn.at | 58 ++++++++++++++++++--------- 3 files changed, 174 insertions(+), 26 deletions(-) diff --git a/northd/northd.c b/northd/northd.c index 59286034d..ee4d4df26 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -12371,11 +12371,82 @@ build_lrouter_out_snat_flow(struct hmap *lflows, struct ovn_datapath *od, } } +static void +build_lrouter_ingress_nat_check_pkt_len(struct hmap *lflows, + const struct nbrec_nat *nat, + struct ovn_datapath *od, bool is_v6, + struct ds *match, struct ds *actions, + int mtu, struct shash *meter_groups) +{ + ds_clear(match); + ds_put_format(match, "inport == %s && "REGBIT_PKT_LARGER + " && "REGBIT_EGRESS_LOOPBACK" == 0", + od->l3dgw_ports[0]->json_key); + + ds_clear(actions); + if (!is_v6) { + ds_put_format(match, " && ip4 && ip4.dst == %s", nat->external_ip); + /* Set icmp4.frag_mtu to gw_mtu */ + ds_put_format(actions, + "icmp4_error {" + REGBIT_EGRESS_LOOPBACK" = 1; " + REGBIT_PKT_LARGER" = 0; " + "eth.dst = eth.src; " + "eth.src = %s; " + "ip4.dst = ip4.src; " + "ip4.src = %s; " + "ip.ttl = 254; " + "icmp4.type = 3; /* Destination Unreachable. */ " + "icmp4.code = 4; /* Frag Needed and DF was Set. */ " + "icmp4.frag_mtu = %d; " + "outport = %s; flags.loopback = 1; output; };", + nat->external_mac, + nat->external_ip, + mtu, od->l3dgw_ports[0]->json_key); + ovn_lflow_add_with_hint__(lflows, od, S_ROUTER_IN_IP_INPUT, 160, + ds_cstr(match), ds_cstr(actions), + NULL, + copp_meter_get( + COPP_ICMP4_ERR, + od->nbr->copp, + meter_groups), + &nat->header_); + } else { + ds_put_format(match, " && ip6 && ip6.dst == %s", nat->external_ip); + /* Set icmp6.frag_mtu to gw_mtu */ + ds_put_format(actions, + "icmp6_error {" + REGBIT_EGRESS_LOOPBACK" = 1; " + REGBIT_PKT_LARGER" = 0; " + "eth.dst = eth.src; " + "eth.src = %s; " + "ip6.dst = ip6.src; " + "ip6.src = %s; " + "ip.ttl = 254; " + "icmp6.type = 2; /* Packet Too Big. */ " + "icmp6.code = 0; " + "icmp6.frag_mtu = %d; " + "outport = %s; flags.loopback = 1; output; };", + nat->external_mac, + nat->external_ip, + mtu, od->l3dgw_ports[0]->json_key); + ovn_lflow_add_with_hint__(lflows, od, S_ROUTER_IN_IP_INPUT, 160, + ds_cstr(match), ds_cstr(actions), + NULL, + copp_meter_get( + COPP_ICMP6_ERR, + od->nbr->copp, + meter_groups), + &nat->header_); + } +} + static void build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od, const struct nbrec_nat *nat, struct ds *match, struct ds *actions, struct eth_addr mac, - bool distributed, bool is_v6) + bool distributed, bool is_v6, + struct shash *meter_groups) { if (od->n_l3dgw_ports && !strcmp(nat->type, "snat")) { ds_clear(match); @@ -12399,7 +12470,8 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od, */ ds_clear(actions); - build_check_pkt_len_action_string(od->l3dgw_ports[0], actions); + int gw_mtu = build_check_pkt_len_action_string(od->l3dgw_ports[0], + actions); ds_put_format(actions, REG_INPORT_ETH_ADDR " = %s; next;", od->l3dgw_ports[0]->lrp_networks.ea_s); @@ -12413,6 +12485,11 @@ build_lrouter_ingress_flow(struct hmap *lflows, struct ovn_datapath *od, ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_ADMISSION, 50, ds_cstr(match), ds_cstr(actions), &nat->header_); + if (gw_mtu) { + build_lrouter_ingress_nat_check_pkt_len(lflows, nat, od, is_v6, + match, actions, gw_mtu, + meter_groups); + } } } @@ -12510,7 +12587,7 @@ lrouter_check_nat_entry(struct ovn_datapath *od, const struct nbrec_nat *nat, static void build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows, struct hmap *ports, struct ds *match, - struct ds *actions) + struct ds *actions, struct shash *meter_groups) { if (!od->nbr) { return; @@ -12618,7 +12695,7 @@ build_lrouter_nat_defrag_and_lb(struct ovn_datapath *od, struct hmap *lflows, /* S_ROUTER_IN_ADMISSION - S_ROUTER_IN_IP_INPUT */ build_lrouter_ingress_flow(lflows, od, nat, match, actions, - mac, distributed, is_v6); + mac, distributed, is_v6, meter_groups); /* Ingress Gateway Redirect Table: For NAT on a distributed * router, add flows that are specific to a NAT rule. These @@ -12789,7 +12866,7 @@ build_lswitch_and_lrouter_iterate_by_od(struct ovn_datapath *od, build_misc_local_traffic_drop_flows_for_lrouter(od, lsi->lflows); build_lrouter_arp_nd_for_datapath(od, lsi->lflows, lsi->meter_groups); build_lrouter_nat_defrag_and_lb(od, lsi->lflows, lsi->ports, &lsi->match, - &lsi->actions); + &lsi->actions, lsi->meter_groups); } /* Helper function to combine all lflow generation which is iterated by port. diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index b6a5f7767..e42c70be1 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -2209,6 +2209,57 @@ next; </p> <ul> + <li> + <p> + For each <code>dnat_and_snat</code> NAT rule on a distributed + logical routers or gateway routers with gateway port + configured with <code>options:gateway_mtu</code> to a valid integer + value <var>M</var>, a priority-160 flow with the match + <code>inport == <var>LRP</var> && REGBIT_PKT_LARGER + && REGBIT_EGRESS_LOOPBACK == 0</code>, where <var>LRP</var> + is the logical router port and applies the following action for ipv4 + and ipv6 respectively: + </p> + + <pre> +icmp4_error { + icmp4.type = 3; /* Destination Unreachable. */ + icmp4.code = 4; /* Frag Needed and DF was Set. */ + icmp4.frag_mtu = <var>M</var>; + eth.dst = eth.src; + eth.src = <var>E</var>; + ip4.dst = ip4.src; + ip4.src = <var>I</var>; + ip.ttl = 255; + REGBIT_EGRESS_LOOPBACK = 1; + REGBIT_PKT_LARGER 0; + outport = <var>LRP</var>; + flags.loopback = 1; + output; +}; + +icmp6_error { + icmp6.type = 2; + icmp6.code = 0; + icmp6.frag_mtu = <var>M</var>; + eth.dst = eth.src; + eth.src = <var>E</var>; + ip6.dst = ip6.src; + ip6.src = <var>I</var>; + ip.ttl = 255; + REGBIT_EGRESS_LOOPBACK = 1; + REGBIT_PKT_LARGER 0; + outport = <var>LRP</var>; + flags.loopback = 1; + output; +}; + </pre> + <p> + where <var>E</var> and <var>I</var> are the NAT rule external mac + and IP respectively. + </p> + </li> + <li> <p> For distributed logical routers or gateway routers with gateway port @@ -2221,7 +2272,7 @@ next; </p> <pre> -icmp4 { +icmp4_error { icmp4.type = 3; /* Destination Unreachable. */ icmp4.code = 4; /* Frag Needed and DF was Set. */ icmp4.frag_mtu = <var>M</var>; @@ -2234,7 +2285,7 @@ icmp4 { next(pipeline=ingress, table=0); }; -icmp6 { +icmp6_error { icmp6.type = 2; icmp6.code = 0; icmp6.frag_mtu = <var>M</var>; diff --git a/tests/ovn.at b/tests/ovn.at index ec4c877b2..6f6f5c2da 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -16801,6 +16801,8 @@ ovn_start ovn-nbctl ls-add sw0 ovn-nbctl lsp-add sw0 sw0-port1 ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 10.0.0.3 1000::3" +ovn-nbctl lsp-add sw0 sw0-port2 +ovn-nbctl lsp-set-addresses sw0-port2 "50:54:00:00:00:02 10.0.0.4 1000::4" ovn-nbctl lr-add lr0 ovn-nbctl lrp-add lr0 lr0-sw0 00:00:00:00:ff:01 10.0.0.1/24 1000::1/64 @@ -16824,7 +16826,9 @@ ovn-nbctl lsp-set-options ln-public network_name=phys ovn-nbctl lrp-set-gateway-chassis lr0-public hv1 20 ovn-nbctl lr-nat-add lr0 snat 172.168.0.100 10.0.0.0/24 +ovn-nbctl lr-nat-add lr0 dnat_and_snat 172.168.0.110 10.0.0.4 sw0-port2 f0:00:00:01:02:04 ovn-nbctl lr-nat-add lr0 snat 2000::1 1000::/64 +ovn-nbctl lr-nat-add lr0 dnat_and_snat 2000::2 1000::4 sw0-port2 f0:00:00:01:02:04 net_add n1 @@ -16838,6 +16842,11 @@ ovs-vsctl -- add-port br-int hv1-vif1 -- \ options:tx_pcap=hv1/vif1-tx.pcap \ options:rxq_pcap=hv1/vif1-rx.pcap \ ofport-request=1 +ovs-vsctl -- add-port br-int hv1-vif2 -- \ + set interface hv1-vif2 external-ids:iface-id=sw0-port2 \ + options:tx_pcap=hv1/vif2-tx.pcap \ + options:rxq_pcap=hv1/vif2-rx.pcap \ + ofport-request=1 reset_pcap_file() { local iface=$1 @@ -16936,16 +16945,19 @@ test_ip_packet_larger() { # IPv4 ingress traffic generated outside the cluster test_ip_packet_larger_ext() { - local mtu=$1 + local idx=$1 + local checksum=$4 + local mtu=$5 + local reply_checksum=$6 # Send ip packet from sw0-port1 to outside src_mac="00000012af11" # external mac - dst_mac="000020201213" # lr0-public mac + dst_mac="$2" # lr0-public mac src_ip=`ip_to_hex 172 168 0 4` - dst_ip=`ip_to_hex 172 168 0 100` + dst_ip="$3" # Set the packet length to 118. pkt_len=0076 - packet=${dst_mac}${src_mac}08004500${pkt_len}00000000400120cf + packet=${dst_mac}${src_mac}08004500${pkt_len}000000004001${checksum} orig_packet_l3=${src_ip}${dst_ip}0900000000000000 orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 orig_packet_l3=${orig_packet_l3}000000000000000000000000000000000000 @@ -16959,19 +16971,19 @@ test_ip_packet_larger_ext() { # optional_gw_ip_garp=ffffffffffff00002020121308060001080006040001000020201213aca80064000000000000aca80064 ext_ip_garp=ffffffffffff00000012af110806000108000604000100000012af11aca80004000000000000aca80004 - src_ip=`ip_to_hex 172 168 0 100` + src_ip="$3" dst_ip=`ip_to_hex 172 168 0 4` # pkt len should be 146 (28 (icmp packet) + 118 (orig ip + payload)) reply_pkt_len=0092 ip_csum=f397 - icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe0122b2 + icmp_reply=${src_mac}${dst_mac}08004500${reply_pkt_len}00004000fe01${reply_checksum} icmp_reply=${icmp_reply}${src_ip}${dst_ip}0304${ip_csum}0000$(printf "%04x" $mtu) - icmp_reply=${icmp_reply}4500${pkt_len}00000000400120cf + icmp_reply=${icmp_reply}4500${pkt_len}000000004001${checksum} icmp_reply=${icmp_reply}${orig_packet_l3} echo $icmp_reply > br-phys_n1.expected as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1 - as hv1 reset_pcap_file hv1-vif1 hv1/vif1 + as hv1 reset_pcap_file hv1-vif${idx} hv1/vif${idx} check as hv1 ovs-appctl netdev-dummy/receive br-phys_n1 $ext_ip_garp sleep 1 @@ -17038,13 +17050,15 @@ test_ip6_packet_larger() { # IPv6 ingress traffic generated outside the cluster test_ip6_packet_larger_ext() { - local mtu=$1 + local idx=$1 + local mtu=$4 + local icmp_checksum=$5 local eth_src=00000012af11 - local eth_dst=000020201213 + local eth_dst=$2 local ipv6_src=20000000000000000000000000000004 - local ipv6_dst=20000000000000000000000000000001 + local ipv6_dst=$3 local payload=0000000000000000000000000000000000000000 local payload=${payload}0000000000000000000000000000000000000000 @@ -17052,11 +17066,11 @@ test_ip6_packet_larger_ext() { local payload=${payload}0000000000000000000000000000000000000000 local ip6_hdr=6000000000583afe${ipv6_src}${ipv6_dst} - local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000cc7662f00001${payload} + local packet=${eth_dst}${eth_src}86dd${ip6_hdr}9000${icmp_checksum}62f00001${payload} # Some ** ARP ** packets might still be received - ignore them as hv1 reset_pcap_file br-phys_n1 hv1/br-phys_n1 - as hv1 reset_pcap_file hv1-vif1 hv1/vif1 + as hv1 reset_pcap_file hv1-vif${idx} hv1/vif${idx} local na_ip6_hdr=6000000000203aff${ipv6_src}${ipv6_dst} local na=${eth_dst}${eth_src}86dd${na_ip6_hdr}8800d78440000000${ipv6_src}0201${eth_src} @@ -17114,7 +17128,7 @@ for mtu in 100 500 118; do OVS_WAIT_FOR_OUTPUT([ as hv1 ovs-ofctl dump-flows br-int > br-int-flows-$mtu AT_CAPTURE_FILE([br-int-flows-$mtu]) - grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [3 + grep "check_pkt_larger($(expr $mtu + 18))" br-int-flows-$mtu | wc -l], [0], [4 ]) AS_BOX([testing outgoing traffic mtu $mtu - IPv4]) @@ -17132,14 +17146,20 @@ AT_CAPTURE_FILE([ext-sbflows-100]) OVS_WAIT_FOR_OUTPUT([ as hv1 ovs-ofctl dump-flows br-int > ext-br-int-flows-100 AT_CAPTURE_FILE([ext-br-int-flows-100]) - grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [3 + grep "check_pkt_larger(118)" ext-br-int-flows-100 | wc -l], [0], [4 ]) AS_BOX([testing ingress traffic mtu 100 - IPv4]) -test_ip_packet_larger_ext 100 +test_ip_packet_larger_ext 1 000020201213 $(ip_to_hex 172 168 0 100) 20cf 100 22b2 + +AS_BOX([testing ingress traffic mtu 100 - IPv4 FIP]) +test_ip_packet_larger_ext 2 f00000010204 $(ip_to_hex 172 168 0 110) 20c5 100 22a8 AS_BOX([testing ingress traffic mtu 100 - IPv6]) -test_ip6_packet_larger_ext 100 +test_ip6_packet_larger_ext 1 000020201213 20000000000000000000000000000001 100 cc76 + +AS_BOX([testing ingress traffic mtu 100 - IPv6 FIP]) +test_ip6_packet_larger_ext 2 f00000010204 20000000000000000000000000000002 100 cc75 ovn-nbctl lsp-del sw0-lr0 @@ -17200,10 +17220,10 @@ OVS_WAIT_FOR_OUTPUT([ ]) AS_BOX([testing ingress traffic mtu 100 for gw router - IPv4]) -test_ip_packet_larger_ext 100 +test_ip_packet_larger_ext 1 000020201213 $(ip_to_hex 172 168 0 100) 20cf 100 22b2 AS_BOX([testing ingress traffic mtu 100 for gw router - IPv6]) -test_ip6_packet_larger_ext 100 +test_ip6_packet_larger_ext 1 000020201213 20000000000000000000000000000001 100 cc76 OVN_CLEANUP([hv1]) AT_CLEANUP -- 2.31.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev