From: Shaohua Wu <[email protected]> If multiple devices send ICMP packets with the same ICMP id, the source ip of the packets changes to the same source ip address after the snat operation, and the packets are sent to the same destination ip address. In this scenario only a single conntrack entry is created for all such flows, causing packets from the other devices to be mapped to the same connection and preventing them from being delivered correctly to the peer.
ovs-appctl dpctl/dump-conntrack icmp,orig=(src=10.0.0.4,dst=10.0.0.2,id=500,type=8,code=0),reply=(src=10.0.0.2,dst=10.0.0.201,id=500,type=0,code=0) After fixing the problem: ovs-appctl dpctl/dump-conntrack icmp,orig=(src=10.0.0.3,dst=10.0.0.2,id=500,type=8,code=0),reply=(src=10.0.0.2,dst=10.0.0.201,id=501,type=0,code=0) icmp,orig=(src=10.0.0.1,dst=10.0.0.2,id=500,type=8,code=0),reply=(src=10.0.0.2,dst=10.0.0.201,id=500,type=0,code=0) icmp,orig=(src=10.0.0.7,dst=10.0.0.2,id=500,type=8,code=0),reply=(src=10.0.0.2,dst=10.0.0.201,id=505,type=0,code=0) icmp,orig=(src=10.0.0.4,dst=10.0.0.2,id=500,type=8,code=0),reply=(src=10.0.0.2,dst=10.0.0.201,id=502,type=0,code=0) icmp,orig=(src=10.0.0.5,dst=10.0.0.2,id=500,type=8,code=0),reply=(src=10.0.0.2,dst=10.0.0.201,id=503,type=0,code=0) icmp,orig=(src=10.0.0.6,dst=10.0.0.2,id=500,type=8,code=0),reply=(src=10.0.0.2,dst=10.0.0.201,id=504,type=0,code=0) While at it, refactor get_ip_proto() and packet_is_icmpv4_info_message() out of conntrack.c into lib/packets.c as public utilities. Rename get_ip_proto() to packet_get_ip_proto() for consistency with the packets module naming convention. Signed-off-by: Shaohua Wu <[email protected]> Signed-off-by: Emeel Hakim <[email protected]> --- lib/conntrack.c | 39 ++++++++--------- lib/packets.c | 74 ++++++++++++++++++++++++++++++++ lib/packets.h | 4 ++ tests/system-kmod-macros.at | 9 ++++ tests/system-traffic.at | 34 +++++++++++++++ tests/system-userspace-macros.at | 5 +++ 6 files changed, 144 insertions(+), 21 deletions(-) diff --git a/lib/conntrack.c b/lib/conntrack.c index 921f63cfe..39c1c4e90 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -776,22 +776,6 @@ write_ct_md(struct dp_packet *pkt, uint16_t zone, const struct conn *conn, } } -static uint8_t -get_ip_proto(const struct dp_packet *pkt) -{ - uint8_t ip_proto; - struct eth_header *l2 = dp_packet_eth(pkt); - if (l2->eth_type == htons(ETH_TYPE_IPV6)) { - struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt); - ip_proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt; - } else { - struct ip_header *l3_hdr = dp_packet_l3(pkt); - ip_proto = l3_hdr->ip_proto; - } - - return ip_proto; -} - static bool is_ftp_ctl(const enum ct_alg_ctl_type ct_alg_ctl) { @@ -806,7 +790,7 @@ get_alg_ctl_type(const struct dp_packet *pkt, const char *helper) * the external dependency. */ enum { CT_IPPORT_FTP = 21 }; enum { CT_IPPORT_TFTP = 69 }; - uint8_t ip_proto = get_ip_proto(pkt); + uint8_t ip_proto = packet_get_ip_proto(pkt); struct udp_header *uh = dp_packet_l4(pkt); struct tcp_header *th = dp_packet_l4(pkt); ovs_be16 ftp_port = htons(CT_IPPORT_FTP); @@ -864,6 +848,9 @@ pat_packet(struct dp_packet *pkt, const struct conn_key *key) packet_set_udp_port(pkt, key->dst.port, key->src.port); } else if (key->nw_proto == IPPROTO_SCTP) { packet_set_sctp_port(pkt, key->dst.port, key->src.port); + } else if (key->nw_proto == IPPROTO_ICMP + && packet_is_icmpv4_info_message(pkt)) { + packet_set_icmp_id(pkt, key->src.icmp_id); } } @@ -2537,8 +2524,8 @@ store_addr_to_key(union ct_addr *addr, struct conn_key *key, static bool nat_get_unique_l4(struct conntrack *ct, struct conn_key *rev_key, - ovs_be16 *port, uint16_t curr, uint16_t min, - uint16_t max) + ovs_be16 *port, ovs_be16 *peer_port, + uint16_t curr, uint16_t min, uint16_t max) { static const unsigned int max_attempts = 128; uint16_t range = max - min + 1; @@ -2559,6 +2546,10 @@ another_round: } *port = htons(curr); + if (peer_port) { + *peer_port = htons(curr); + } + if (!conn_lookup(ct, rev_key, time_msec(), NULL, NULL)) { return true; } @@ -2571,6 +2562,9 @@ another_round: } *port = htons(orig); + if (peer_port) { + *peer_port = htons(orig); + } return false; } @@ -2604,7 +2598,8 @@ nat_get_unique_tuple(struct conntrack *ct, struct conn *conn, struct conn_key *rev_key = &conn->key_node[CT_DIR_REV].key; bool pat_proto = fwd_key->nw_proto == IPPROTO_TCP || fwd_key->nw_proto == IPPROTO_UDP || - fwd_key->nw_proto == IPPROTO_SCTP; + fwd_key->nw_proto == IPPROTO_SCTP || + fwd_key->nw_proto == IPPROTO_ICMP; uint16_t min_dport, max_dport, curr_dport; uint16_t min_sport, max_sport, curr_sport; union ct_addr min_addr, max_addr, addr; @@ -2650,11 +2645,13 @@ nat_get_unique_tuple(struct conntrack *ct, struct conn *conn, bool found = false; if (nat_info->nat_action & NAT_ACTION_DST_PORT) { found = nat_get_unique_l4(ct, rev_key, &rev_key->src.port, - curr_dport, min_dport, max_dport); + NULL, curr_dport, min_dport, max_dport); } if (!found) { found = nat_get_unique_l4(ct, rev_key, &rev_key->dst.port, + rev_key->nw_proto == IPPROTO_ICMP + ? &rev_key->src.port : NULL, curr_sport, min_sport, max_sport); } diff --git a/lib/packets.c b/lib/packets.c index bf5b83d43..998a73afe 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -1511,6 +1511,80 @@ packet_set_icmp(struct dp_packet *packet, uint8_t type, uint8_t code) pkt_metadata_init_conn(&packet->md); } +/* Sets the ICMP id of the ICMP header contained in 'packet'. + * 'packet' must be a valid ICMP packet with its l4 offset properly + * populated. */ +void +packet_set_icmp_id(struct dp_packet *packet, ovs_be16 icmp_id) +{ + struct icmp_header *ih = dp_packet_l4(packet); + + if (!ih || dp_packet_l4_size(packet) < ICMP_HEADER_LEN) { + return; + } + + ovs_be16 orig_ic = ih->icmp_fields.echo.id; + + if (icmp_id != orig_ic) { + ih->icmp_fields.echo.id = icmp_id; + ih->icmp_csum = recalc_csum16(ih->icmp_csum, orig_ic, icmp_id); + } + + pkt_metadata_init_conn(&packet->md); +} + +uint8_t +packet_get_icmp_type(const struct dp_packet *packet) +{ + struct icmp_header *ih = dp_packet_l4(packet); + + if (!ih || dp_packet_l4_size(packet) < ICMP_HEADER_LEN) { + return 0; + } + + return ih->icmp_type; +} + +uint8_t +packet_get_ip_proto(const struct dp_packet *packet) +{ + struct eth_header *l2 = dp_packet_eth(packet); + uint8_t ip_proto; + + if (l2->eth_type == htons(ETH_TYPE_IPV6)) { + struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(packet); + ip_proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt; + } else { + struct ip_header *l3_hdr = dp_packet_l3(packet); + ip_proto = l3_hdr->ip_proto; + } + + return ip_proto; +} + +bool +packet_is_icmpv4_info_message(const struct dp_packet *packet) +{ + uint8_t ip_proto, icmp_type; + + ip_proto = packet_get_ip_proto(packet); + if (ip_proto != IPPROTO_ICMP) { + return false; + } + + icmp_type = packet_get_icmp_type(packet); + if (icmp_type == ICMP4_ECHO_REQUEST || + icmp_type == ICMP4_ECHO_REPLY || + icmp_type == ICMP4_TIMESTAMP || + icmp_type == ICMP4_TIMESTAMPREPLY || + icmp_type == ICMP4_INFOREQUEST || + icmp_type == ICMP4_INFOREPLY) { + return true; + } + + return false; +} + /* Sets the IGMP type to IGMP_HOST_MEMBERSHIP_QUERY and populates the * v3 query header fields in 'packet'. 'packet' must be a valid IGMPv3 * query packet with its l4 offset properly populated. diff --git a/lib/packets.h b/lib/packets.h index 61666f3ad..4d680412a 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -1651,6 +1651,10 @@ void packet_set_tcp_port(struct dp_packet *, ovs_be16 src, ovs_be16 dst); void packet_set_udp_port(struct dp_packet *, ovs_be16 src, ovs_be16 dst); void packet_set_sctp_port(struct dp_packet *, ovs_be16 src, ovs_be16 dst); void packet_set_icmp(struct dp_packet *, uint8_t type, uint8_t code); +void packet_set_icmp_id(struct dp_packet *, ovs_be16 icmp_id); +uint8_t packet_get_icmp_type(const struct dp_packet *packet); +uint8_t packet_get_ip_proto(const struct dp_packet *packet); +bool packet_is_icmpv4_info_message(const struct dp_packet *packet); void packet_set_nd(struct dp_packet *, const struct in6_addr *target, const struct eth_addr sll, const struct eth_addr tll); void packet_set_nd_ext(struct dp_packet *packet, diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at index 313cd7c38..2b42e7797 100644 --- a/tests/system-kmod-macros.at +++ b/tests/system-kmod-macros.at @@ -97,6 +97,15 @@ m4_define([CHECK_CONNTRACK_FRAG_OVERLAP], AT_SKIP_IF([:]) ]) +# CHECK_CONNTRACK_SNAT_ICMP_ID() +# +# Skip ICMP id conflict tests on kernel datapath. The fix for unique ICMP +# id allocation on SNAT is in the userspace conntrack only. +m4_define([CHECK_CONNTRACK_SNAT_ICMP_ID], +[ + AT_SKIP_IF([:]) +]) + # CHECK_CONNTRACK_NAT() # # Perform requirements checks for running conntrack NAT tests. The kernel diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 8f4fdf8b1..852a268c8 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -4257,6 +4257,40 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),reply=(src= OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - SNAT with icmp_id]) +CHECK_CONNTRACK() +CHECK_CONNTRACK_NAT() +CHECK_CONNTRACK_SNAT_ICMP_ID() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) +ADD_VETH(p0, at_ns0, br0, "10.0.0.1/24") +NS_CHECK_EXEC([at_ns0], [ip link set dev p0 address 80:88:88:88:88:88]) +ADD_VETH(p1, at_ns1, br0, "10.0.0.2/24") + +dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0. +AT_DATA([flows.txt], [dnl +table=0,ct_state=-trk,ip,in_port=ovs-p0, actions=ct(table=1, nat) +table=0,ct_state=-trk,ip,in_port=ovs-p1, actions=ct(table=1, nat) +table=1,ct_state=+trk+new,ip,in_port=ovs-p0, actions=ct(commit, nat(src=10.0.0.201)),ovs-p1 +table=1,ct_state=+trk+est,ip,in_port=ovs-p0, actions=ovs-p1 +table=1,ct_state=+trk+est,ip,in_port=ovs-p1, actions=ovs-p0 +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-p0,packet=62cf3bde230262cf3bde230108004500001c00010000400166de0a0000010a0000020800f60b01f40000 actions=resubmit(,0)"]) +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-p0,packet=62cf3bde230262cf3bde230108004500001c00010000400166dc0a0000030a0000020800f60b01f40000 actions=resubmit(,0)"]) +AT_CHECK([ovs-ofctl packet-out br0 "in_port=ovs-p0,packet=62cf3bde230262cf3bde230108004500001c00010000400166db0a0000040a0000020800f60b01f40000 actions=resubmit(,0)"]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | sort], [0], [dnl +icmp,orig=(src=10.0.0.1,dst=10.0.0.2,id=500,type=8,code=0),reply=(src=10.0.0.2,dst=10.0.0.201,id=500,type=0,code=0) +icmp,orig=(src=10.0.0.3,dst=10.0.0.2,id=500,type=8,code=0),reply=(src=10.0.0.2,dst=10.0.0.201,id=501,type=0,code=0) +icmp,orig=(src=10.0.0.4,dst=10.0.0.2,id=500,type=8,code=0),reply=(src=10.0.0.2,dst=10.0.0.201,id=502,type=0,code=0) +]) +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([conntrack - multiple namespaces, internal ports]) CHECK_CONNTRACK() CHECK_CONNTRACK_LOCAL_STACK() diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index 9961b37da..f0d9121e3 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -91,6 +91,11 @@ m4_define([CHECK_CONNTRACK_LOCAL_STACK], # The userspace datapath supports fragment overlap check. m4_define([CHECK_CONNTRACK_FRAG_OVERLAP]) +# CHECK_CONNTRACK_SNAT_ICMP_ID() +# +# The userspace conntrack supports unique ICMP id allocation on SNAT. +m4_define([CHECK_CONNTRACK_SNAT_ICMP_ID]) + # CHECK_CONNTRACK_NAT() # # Perform requirements checks for running conntrack NAT tests. The userspace -- 2.34.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
