Commit [1] added handling for E/W ICMPv4/v6 "fragmentation needed" packets generated for overlay tunneled traffic. This was required because kernel generates ICMP "need frag" packet when a tunneled packet exceeds the path MTU, and such packets were previously dropped due to metadata needing to be swapped.
However, it did not cover the case where similar ICMP packets arrive from RAMP tunnels. Such packets are not subject to the same metadata handling constraints, since VXLAN encapsulation in this case does not encode port in the VNI field, and, also, packets are delivered directly to the destination VIF MAC address. As a result, they do not match the added for such packets rules and are dropped. Exclude packets coming from VTEP tunnels from this special handling. [1] https://github.com/ovn-org/ovn/commit/221476a01f2670cf4eb78cd9353e709cb8a16329 Fixes: 221476a01f26 ("ovn: Add tunnel PMTUD support.") Signed-off-by: Alexandra Rukomoinikova <[email protected]> --- v2 --> v3: removed ACK since i changed code added helper function for all get is-vtep calls changed naming from is_vtep -> to is_ramp v1 --> v2: added ACK by rename add_tunnel_ingress_icmp_need_frag_flow func to add_tunnel_ingress_pmtud_flows fixed Lorenzo's comments --- controller/encaps.c | 29 +++++++++----- controller/encaps.h | 7 ++++ controller/local_data.c | 1 + controller/local_data.h | 1 + controller/physical.c | 77 ++++++++++++++++++++++-------------- tests/ovn-controller-vtep.at | 4 ++ 6 files changed, 80 insertions(+), 39 deletions(-) diff --git a/controller/encaps.c b/controller/encaps.c index 61f41bf3a..919eea432 100644 --- a/controller/encaps.c +++ b/controller/encaps.c @@ -25,7 +25,6 @@ #include "lib/ovn-sb-idl.h" #include "lib/ovsdb-idl.h" #include "ovn-controller.h" -#include "smap.h" VLOG_DEFINE_THIS_MODULE(encaps); @@ -44,6 +43,7 @@ encaps_register_ovs_idl(struct ovsdb_idl *ovs_idl) ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_name); ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_type); ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_options); + ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_other_config); } /* Enough context to create a new tunnel, using tunnel_add(). */ @@ -201,12 +201,14 @@ out: } static void -tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg, - const char *new_chassis_id, const struct sbrec_encap *encap, - const char *local_ip, +tunnel_add(struct tunnel_ctx *tc, + const struct sbrec_sb_global *sbg, + const struct sbrec_chassis *chassis_rec, + const struct sbrec_encap *encap, const char *local_ip, const struct ovsrec_open_vswitch_table *ovs_table) { struct smap options = SMAP_INITIALIZER(&options); + struct smap other_config = SMAP_INITIALIZER(&other_config); smap_add(&options, "remote_ip", encap->ip); smap_add(&options, "local_ip", local_ip); smap_add(&options, "key", "flow"); @@ -221,9 +223,9 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg, * combination of the chassis_name and the remote and local encap-ips to * identify a specific tunnel to the remote chassis. */ - tunnel_entry_id = encaps_tunnel_id_create(new_chassis_id, encap->ip, + tunnel_entry_id = encaps_tunnel_id_create(chassis_rec->name, encap->ip, local_ip); - tunnel_entry_id_old = encaps_tunnel_id_create_legacy(new_chassis_id, + tunnel_entry_id_old = encaps_tunnel_id_create_legacy(chassis_rec->name, encap->ip); if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) { smap_add(&options, "csum", csum); @@ -258,7 +260,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg, /* Add auth info if ipsec is enabled. */ if (sbg->ipsec) { - smap_add(&options, "remote_name", new_chassis_id); + smap_add(&options, "remote_name", chassis_rec->name); /* Force NAT-T traversal via configuration */ /* Two ipsec backends are supported: libreswan and strongswan */ @@ -276,6 +278,11 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg, } } + if (is_ramp_tunnel(&chassis_rec->other_config)) { + /* Propagate ramp switch flag from chassis to interface */ + smap_add(&other_config, "is-vtep", "true"); + } + /* If there's an existing tunnel record that does not need any change, * keep it. Otherwise, create a new record (if there was an existing * record, the new record will supplant it and encaps_run() will delete @@ -312,10 +319,10 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg, * its name, otherwise generate a new, unique name. */ char *port_name = (tunnel ? xstrdup(tunnel->port->name) - : tunnel_create_name(tc, new_chassis_id)); + : tunnel_create_name(tc, chassis_rec->name)); if (!port_name) { VLOG_WARN("Unable to allocate unique name for '%s' tunnel", - new_chassis_id); + chassis_rec->name); goto exit; } @@ -323,6 +330,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg, ovsrec_interface_set_name(iface, port_name); ovsrec_interface_set_type(iface, encap->type); ovsrec_interface_set_options(iface, &options); + ovsrec_interface_set_other_config(iface, &other_config); struct ovsrec_port *port = ovsrec_port_insert(tc->ovs_txn); ovsrec_port_set_name(port, port_name); @@ -338,6 +346,7 @@ exit: free(tunnel_entry_id); free(tunnel_entry_id_old); smap_destroy(&options); + smap_destroy(&other_config); } static bool @@ -403,7 +412,7 @@ chassis_tunnel_add(const struct sbrec_chassis *chassis_rec, } VLOG_DBG("tunnel_add: '%s', local ip: %s", chassis_rec->name, this_chassis->encaps[j]->ip); - tunnel_add(tc, sbg, chassis_rec->name, chassis_rec->encaps[i], + tunnel_add(tc, sbg, chassis_rec, chassis_rec->encaps[i], this_chassis->encaps[j]->ip, ovs_table); tuncnt++; } diff --git a/controller/encaps.h b/controller/encaps.h index fa5dc17e5..0257d08c1 100644 --- a/controller/encaps.h +++ b/controller/encaps.h @@ -17,6 +17,7 @@ #define OVN_ENCAPS_H 1 #include <stdbool.h> +#include "smap.h" /* * Given there could be multiple tunnels with different IPs to the same @@ -68,4 +69,10 @@ bool encaps_tunnel_id_match(const char *tunnel_id, const char *chassis_id, void encaps_destroy(void); +static inline bool +is_ramp_tunnel(const struct smap *other_config) +{ + return smap_get_bool(other_config, "is-vtep", false); +} + #endif /* controller/encaps.h */ diff --git a/controller/local_data.c b/controller/local_data.c index dda746d73..af6c75b40 100644 --- a/controller/local_data.c +++ b/controller/local_data.c @@ -532,6 +532,7 @@ local_nonvif_data_run(const struct ovsrec_bridge *br_int, tun->ofport = u16_to_ofp(ofport); tun->type = tunnel_type; tun->is_ipv6 = ip ? addr_is_ipv6(ip) : false; + tun->is_ramp_tunnel = is_ramp_tunnel(&iface_rec->other_config); free(hash_id); free(ip); diff --git a/controller/local_data.h b/controller/local_data.h index 948c1a935..cbb8899eb 100644 --- a/controller/local_data.h +++ b/controller/local_data.h @@ -146,6 +146,7 @@ struct chassis_tunnel { ofp_port_t ofport; enum chassis_tunnel_type type; bool is_ipv6; + bool is_ramp_tunnel; }; /* Flow-based tunnel that consolidates multiple endpoints into a single diff --git a/controller/physical.c b/controller/physical.c index 228f3d171..30ebeb1b6 100644 --- a/controller/physical.c +++ b/controller/physical.c @@ -351,30 +351,35 @@ put_flow_based_remote_port_redirect_overlay( } } +/* Add handling for E/W ICMPv4/v6 packets when tunneled packets exceed + * path MTU. + * If packet needs to be tunneled to another node and the physical + * interface used for tunneling has a lower MTU than the packet size, + * or if there is a route exception with a smaller MTU, kernel + * generates an ICMP "Fragmentation Needed" message, but packet + * metadata didn't change. Such packets might have been dropped due + * to required metadata modifications for returned packet. + * + * Mark these packets with MLF_RX_FROM_TUNNEL_BIT for further + * processing. Packets received from a RAMP tunnel should be passed + * through, and errors handled via normal processing path, since + * port metadata is not carried in RAMP packets in VNI. + */ static void -add_tunnel_ingress_flows(const struct chassis_tunnel *tun, - enum mf_field_id mff_ovn_geneve, - struct ovn_desired_flow_table *flow_table, - struct ofpbuf *ofpacts) +add_tunnel_ingress_pmtud_flows(const struct chassis_tunnel *tun, + struct ofpbuf *ofpacts, + struct ovn_desired_flow_table *flow_table) { - /* Main ingress flow (priority 100) */ - struct match match = MATCH_CATCHALL_INITIALIZER; - match_set_in_port(&match, tun->ofport); - - ofpbuf_clear(ofpacts); - put_decapsulation(mff_ovn_geneve, tun, ofpacts); - put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts); + if (tun->is_ramp_tunnel) { + return; + } - ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match, - ofpacts, hc_uuid); + struct match match = MATCH_CATCHALL_INITIALIZER; /* Set allow rx from tunnel bit */ put_load(1, MFF_LOG_FLAGS, MLF_RX_FROM_TUNNEL_BIT, 1, ofpacts); put_resubmit(OFTABLE_CT_ZONE_LOOKUP, ofpacts); - /* Add specific flows for E/W ICMPv{4,6} packets if tunnelled packets - * do not fit path MTU. */ - /* IPv4 ICMP flow (priority 120) */ match_init_catchall(&match); match_set_in_port(&match, tun->ofport); @@ -398,6 +403,26 @@ add_tunnel_ingress_flows(const struct chassis_tunnel *tun, ofpacts, hc_uuid); } +static void +add_tunnel_ingress_flows(const struct chassis_tunnel *tun, + enum mf_field_id mff_ovn_geneve, + struct ovn_desired_flow_table *flow_table, + struct ofpbuf *ofpacts) +{ + /* Main ingress flow (priority 100) */ + struct match match = MATCH_CATCHALL_INITIALIZER; + match_set_in_port(&match, tun->ofport); + + ofpbuf_clear(ofpacts); + put_decapsulation(mff_ovn_geneve, tun, ofpacts); + put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts); + + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match, + ofpacts, hc_uuid); + + add_tunnel_ingress_pmtud_flows(tun, ofpacts, flow_table); +} + static void put_stack(enum mf_field_id field, struct ofpact_stack *stack) { @@ -2827,12 +2852,6 @@ fanout_to_chassis_port_based(enum mf_field_id mff_ovn_geneve, } } -static bool -chassis_is_vtep(const struct sbrec_chassis *chassis) -{ - return smap_get_bool(&chassis->other_config, "is-vtep", false); -} - static void local_output_pb(int64_t tunnel_key, struct ofpbuf *ofpacts) { @@ -3011,19 +3030,19 @@ consider_mc_group(const struct physical_ctx *ctx, * otherwise multicast will reach remote ports through localnet * port. */ if (port->chassis) { - if (chassis_is_vtep(port->chassis)) { + if (is_ramp_tunnel(&port->chassis->other_config)) { sset_add(&vtep_chassis, port->chassis->name); } else { sset_add(&remote_chassis, port->chassis->name); } } for (size_t j = 0; j < port->n_additional_chassis; j++) { - if (chassis_is_vtep(port->additional_chassis[j])) { - sset_add(&vtep_chassis, - port->additional_chassis[j]->name); + struct sbrec_chassis *additional_chassis = + port->additional_chassis[j]; + if (is_ramp_tunnel(&additional_chassis->other_config)) { + sset_add(&vtep_chassis, additional_chassis->name); } else { - sset_add(&remote_chassis, - port->additional_chassis[j]->name); + sset_add(&remote_chassis, additional_chassis->name); } } } @@ -3943,7 +3962,7 @@ physical_run(struct physical_ctx *p_ctx, struct chassis_tunnel *tun; HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) { add_tunnel_ingress_flows(tun, p_ctx->mff_ovn_geneve, flow_table, - &ofpacts); + &ofpacts); } /* Process packets that arrive from flow-based tunnels. */ diff --git a/tests/ovn-controller-vtep.at b/tests/ovn-controller-vtep.at index 961324bd2..caf53e291 100644 --- a/tests/ovn-controller-vtep.at +++ b/tests/ovn-controller-vtep.at @@ -775,6 +775,10 @@ AT_CHECK([ovs-ofctl dump-flows br-int table=OFTABLE_PHY_TO_LOG | grep 'priority= priority=110,tun_id=0x<>,in_port=<> actions=move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],load:0x<>->NXM_NX_REG14[[0..14]],load:0x<>->NXM_NX_REG10[[1]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE) ]) +# Skip processing ICMP "packet too big" errors in this table if the packet came from a VTEP tunnel. +AT_CHECK([ovs-ofctl dump-flows br-int table=OFTABLE_PHY_TO_LOG | \ + grep -E 'icmp_type=3,icmp_code=4|icmp_type=2,icmp_code=0'], [1], []) + OVN_CONTROLLER_VTEP_STOP([], vtep1) OVN_CLEANUP([hv1]) AT_CLEANUP -- 2.48.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
