On 04.05.2026 15:26, Dumitru Ceara wrote: > On 4/23/26 5:04 PM, Alexandra Rukomoinikova via dev wrote: >> Commit [1] added handling for E/W ICMPv4/v6 "fragmentation needed" >> packets generated for overlay tunneled traffic. This was required >> because kernel generates ICMP "need frag" packet when a tunneled >> packet exceeds the path MTU, and such packets were previously >> dropped due to metadata needing to be swapped. >> >> However, it did not cover the case where similar ICMP packets arrive >> from RAMP tunnels. Such packets are not subject to the same metadata >> handling constraints, since VXLAN encapsulation in this case does not >> encode port in the VNI field, and, also, packets are delivered directly >> to the destination VIF MAC address. >> >> As a result, they do not match the added for such packets rules and are >> dropped. Exclude packets coming from VTEP tunnels from this special handling. >> >> [1] >> https://github.com/ovn-org/ovn/commit/221476a01f2670cf4eb78cd9353e709cb8a16329 >> Fixes: 221476a01f26 ("ovn: Add tunnel PMTUD support.") >> Signed-off-by: Alexandra Rukomoinikova <[email protected]> >> --- >> v2 --> v3: removed ACK since i changed code >> added helper function for all get is-vtep calls >> changed naming from is_vtep -> to is_ramp >> v1 --> v2: added ACK by >> rename add_tunnel_ingress_icmp_need_frag_flow func to >> add_tunnel_ingress_pmtud_flows >> fixed Lorenzo's comments >> --- > Hi Alexandra, > > Thanks for this new revision! > >> controller/encaps.c | 29 +++++++++----- >> controller/encaps.h | 7 ++++ >> controller/local_data.c | 1 + >> controller/local_data.h | 1 + >> controller/physical.c | 77 ++++++++++++++++++++++-------------- >> tests/ovn-controller-vtep.at | 4 ++ >> 6 files changed, 80 insertions(+), 39 deletions(-) >> >> diff --git a/controller/encaps.c b/controller/encaps.c >> index 61f41bf3a..919eea432 100644 >> --- a/controller/encaps.c >> +++ b/controller/encaps.c >> @@ -25,7 +25,6 @@ >> #include "lib/ovn-sb-idl.h" >> #include "lib/ovsdb-idl.h" >> #include "ovn-controller.h" >> -#include "smap.h" >> >> VLOG_DEFINE_THIS_MODULE(encaps); >> >> @@ -44,6 +43,7 @@ encaps_register_ovs_idl(struct ovsdb_idl *ovs_idl) >> ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_name); >> ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_type); >> ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_options); >> + ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_other_config); >> } >> >> /* Enough context to create a new tunnel, using tunnel_add(). */ >> @@ -201,12 +201,14 @@ out: >> } >> >> static void >> -tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg, >> - const char *new_chassis_id, const struct sbrec_encap *encap, >> - const char *local_ip, >> +tunnel_add(struct tunnel_ctx *tc, >> + const struct sbrec_sb_global *sbg, >> + const struct sbrec_chassis *chassis_rec, >> + const struct sbrec_encap *encap, const char *local_ip, >> const struct ovsrec_open_vswitch_table *ovs_table) >> { >> struct smap options = SMAP_INITIALIZER(&options); >> + struct smap other_config = SMAP_INITIALIZER(&other_config); >> smap_add(&options, "remote_ip", encap->ip); >> smap_add(&options, "local_ip", local_ip); >> smap_add(&options, "key", "flow"); >> @@ -221,9 +223,9 @@ tunnel_add(struct tunnel_ctx *tc, const struct >> sbrec_sb_global *sbg, >> * combination of the chassis_name and the remote and local encap-ips >> to >> * identify a specific tunnel to the remote chassis. >> */ >> - tunnel_entry_id = encaps_tunnel_id_create(new_chassis_id, encap->ip, >> + tunnel_entry_id = encaps_tunnel_id_create(chassis_rec->name, encap->ip, >> local_ip); >> - tunnel_entry_id_old = encaps_tunnel_id_create_legacy(new_chassis_id, >> + tunnel_entry_id_old = encaps_tunnel_id_create_legacy(chassis_rec->name, >> encap->ip); >> if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) { >> smap_add(&options, "csum", csum); >> @@ -258,7 +260,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct >> sbrec_sb_global *sbg, >> >> /* Add auth info if ipsec is enabled. */ >> if (sbg->ipsec) { >> - smap_add(&options, "remote_name", new_chassis_id); >> + smap_add(&options, "remote_name", chassis_rec->name); >> >> /* Force NAT-T traversal via configuration */ >> /* Two ipsec backends are supported: libreswan and strongswan */ >> @@ -276,6 +278,11 @@ tunnel_add(struct tunnel_ctx *tc, const struct >> sbrec_sb_global *sbg, >> } >> } >> >> + if (is_ramp_tunnel(&chassis_rec->other_config)) { >> + /* Propagate ramp switch flag from chassis to interface */ > Nit: one space too many, plus missing period at end of sentence. > >> + smap_add(&other_config, "is-vtep", "true"); >> + } >> + >> /* If there's an existing tunnel record that does not need any change, >> * keep it. Otherwise, create a new record (if there was an existing >> * record, the new record will supplant it and encaps_run() will delete >> @@ -312,10 +319,10 @@ tunnel_add(struct tunnel_ctx *tc, const struct >> sbrec_sb_global *sbg, >> * its name, otherwise generate a new, unique name. */ >> char *port_name = (tunnel >> ? xstrdup(tunnel->port->name) >> - : tunnel_create_name(tc, new_chassis_id)); >> + : tunnel_create_name(tc, chassis_rec->name)); >> if (!port_name) { >> VLOG_WARN("Unable to allocate unique name for '%s' tunnel", >> - new_chassis_id); >> + chassis_rec->name); >> goto exit; >> } >> >> @@ -323,6 +330,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct >> sbrec_sb_global *sbg, >> ovsrec_interface_set_name(iface, port_name); >> ovsrec_interface_set_type(iface, encap->type); >> ovsrec_interface_set_options(iface, &options); >> + ovsrec_interface_set_other_config(iface, &other_config); >> >> struct ovsrec_port *port = ovsrec_port_insert(tc->ovs_txn); >> ovsrec_port_set_name(port, port_name); >> @@ -338,6 +346,7 @@ exit: >> free(tunnel_entry_id); >> free(tunnel_entry_id_old); >> smap_destroy(&options); >> + smap_destroy(&other_config); >> } >> >> static bool >> @@ -403,7 +412,7 @@ chassis_tunnel_add(const struct sbrec_chassis >> *chassis_rec, >> } >> VLOG_DBG("tunnel_add: '%s', local ip: %s", chassis_rec->name, >> this_chassis->encaps[j]->ip); >> - tunnel_add(tc, sbg, chassis_rec->name, chassis_rec->encaps[i], >> + tunnel_add(tc, sbg, chassis_rec, chassis_rec->encaps[i], >> this_chassis->encaps[j]->ip, ovs_table); >> tuncnt++; >> } >> diff --git a/controller/encaps.h b/controller/encaps.h >> index fa5dc17e5..0257d08c1 100644 >> --- a/controller/encaps.h >> +++ b/controller/encaps.h >> @@ -17,6 +17,7 @@ >> #define OVN_ENCAPS_H 1 >> >> #include <stdbool.h> >> +#include "smap.h" >> >> /* >> * Given there could be multiple tunnels with different IPs to the same >> @@ -68,4 +69,10 @@ bool encaps_tunnel_id_match(const char *tunnel_id, const >> char *chassis_id, >> >> void encaps_destroy(void); >> >> +static inline bool >> +is_ramp_tunnel(const struct smap *other_config) >> +{ >> + return smap_get_bool(other_config, "is-vtep", false); >> +} >> + >> #endif /* controller/encaps.h */ >> diff --git a/controller/local_data.c b/controller/local_data.c >> index dda746d73..af6c75b40 100644 >> --- a/controller/local_data.c >> +++ b/controller/local_data.c >> @@ -532,6 +532,7 @@ local_nonvif_data_run(const struct ovsrec_bridge *br_int, >> tun->ofport = u16_to_ofp(ofport); >> tun->type = tunnel_type; >> tun->is_ipv6 = ip ? addr_is_ipv6(ip) : false; >> + tun->is_ramp_tunnel = >> is_ramp_tunnel(&iface_rec->other_config); >> >> free(hash_id); >> free(ip); >> diff --git a/controller/local_data.h b/controller/local_data.h >> index 948c1a935..cbb8899eb 100644 >> --- a/controller/local_data.h >> +++ b/controller/local_data.h >> @@ -146,6 +146,7 @@ struct chassis_tunnel { >> ofp_port_t ofport; >> enum chassis_tunnel_type type; >> bool is_ipv6; >> + bool is_ramp_tunnel; >> }; >> >> /* Flow-based tunnel that consolidates multiple endpoints into a single >> diff --git a/controller/physical.c b/controller/physical.c >> index 228f3d171..30ebeb1b6 100644 >> --- a/controller/physical.c >> +++ b/controller/physical.c >> @@ -351,30 +351,35 @@ put_flow_based_remote_port_redirect_overlay( >> } >> } >> >> +/* Add handling for E/W ICMPv4/v6 packets when tunneled packets exceed >> + * path MTU. >> + * If packet needs to be tunneled to another node and the physical >> + * interface used for tunneling has a lower MTU than the packet size, >> + * or if there is a route exception with a smaller MTU, kernel >> + * generates an ICMP "Fragmentation Needed" message, but packet >> + * metadata didn't change. Such packets might have been dropped due >> + * to required metadata modifications for returned packet. >> + * >> + * Mark these packets with MLF_RX_FROM_TUNNEL_BIT for further >> + * processing. Packets received from a RAMP tunnel should be passed >> + * through, and errors handled via normal processing path, since >> + * port metadata is not carried in RAMP packets in VNI. >> + */ >> static void >> -add_tunnel_ingress_flows(const struct chassis_tunnel *tun, >> - enum mf_field_id mff_ovn_geneve, >> - struct ovn_desired_flow_table *flow_table, >> - struct ofpbuf *ofpacts) >> +add_tunnel_ingress_pmtud_flows(const struct chassis_tunnel *tun, >> + struct ofpbuf *ofpacts, >> + struct ovn_desired_flow_table *flow_table) >> { >> - /* Main ingress flow (priority 100) */ >> - struct match match = MATCH_CATCHALL_INITIALIZER; >> - match_set_in_port(&match, tun->ofport); >> - >> - ofpbuf_clear(ofpacts); >> - put_decapsulation(mff_ovn_geneve, tun, ofpacts); >> - put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts); >> + if (tun->is_ramp_tunnel) { >> + return; >> + } >> >> - ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match, >> - ofpacts, hc_uuid); >> + struct match match = MATCH_CATCHALL_INITIALIZER; >> >> /* Set allow rx from tunnel bit */ >> put_load(1, MFF_LOG_FLAGS, MLF_RX_FROM_TUNNEL_BIT, 1, ofpacts); >> put_resubmit(OFTABLE_CT_ZONE_LOOKUP, ofpacts); >> >> - /* Add specific flows for E/W ICMPv{4,6} packets if tunnelled packets >> - * do not fit path MTU. */ >> - >> /* IPv4 ICMP flow (priority 120) */ >> match_init_catchall(&match); >> match_set_in_port(&match, tun->ofport); >> @@ -398,6 +403,26 @@ add_tunnel_ingress_flows(const struct chassis_tunnel >> *tun, >> ofpacts, hc_uuid); >> } >> >> +static void >> +add_tunnel_ingress_flows(const struct chassis_tunnel *tun, >> + enum mf_field_id mff_ovn_geneve, >> + struct ovn_desired_flow_table *flow_table, >> + struct ofpbuf *ofpacts) >> +{ >> + /* Main ingress flow (priority 100) */ >> + struct match match = MATCH_CATCHALL_INITIALIZER; >> + match_set_in_port(&match, tun->ofport); >> + >> + ofpbuf_clear(ofpacts); >> + put_decapsulation(mff_ovn_geneve, tun, ofpacts); >> + put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts); >> + >> + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match, >> + ofpacts, hc_uuid); >> + >> + add_tunnel_ingress_pmtud_flows(tun, ofpacts, flow_table); >> +} >> + >> static void >> put_stack(enum mf_field_id field, struct ofpact_stack *stack) >> { >> @@ -2827,12 +2852,6 @@ fanout_to_chassis_port_based(enum mf_field_id >> mff_ovn_geneve, >> } >> } >> >> -static bool >> -chassis_is_vtep(const struct sbrec_chassis *chassis) >> -{ >> - return smap_get_bool(&chassis->other_config, "is-vtep", false); >> -} >> - >> static void >> local_output_pb(int64_t tunnel_key, struct ofpbuf *ofpacts) >> { >> @@ -3011,19 +3030,19 @@ consider_mc_group(const struct physical_ctx *ctx, >> * otherwise multicast will reach remote ports through localnet >> * port. */ >> if (port->chassis) { >> - if (chassis_is_vtep(port->chassis)) { >> + if (is_ramp_tunnel(&port->chassis->other_config)) { >> sset_add(&vtep_chassis, port->chassis->name); >> } else { >> sset_add(&remote_chassis, port->chassis->name); >> } >> } >> for (size_t j = 0; j < port->n_additional_chassis; j++) { >> - if (chassis_is_vtep(port->additional_chassis[j])) { >> - sset_add(&vtep_chassis, >> - port->additional_chassis[j]->name); >> + struct sbrec_chassis *additional_chassis = >> + port->additional_chassis[j]; >> + if (is_ramp_tunnel(&additional_chassis->other_config)) { >> + sset_add(&vtep_chassis, additional_chassis->name); >> } else { >> - sset_add(&remote_chassis, >> - port->additional_chassis[j]->name); >> + sset_add(&remote_chassis, additional_chassis->name); >> } >> } >> } >> @@ -3943,7 +3962,7 @@ physical_run(struct physical_ctx *p_ctx, >> struct chassis_tunnel *tun; >> HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) { >> add_tunnel_ingress_flows(tun, p_ctx->mff_ovn_geneve, flow_table, >> - &ofpacts); >> + &ofpacts); > I know we agreed in v2 to do this small indentation fix. But there's > another small one just below, the next call to add_tunnel_ingress_flows(). > >> } >> >> /* Process packets that arrive from flow-based tunnels. */ >> diff --git a/tests/ovn-controller-vtep.at b/tests/ovn-controller-vtep.at >> index 961324bd2..caf53e291 100644 >> --- a/tests/ovn-controller-vtep.at >> +++ b/tests/ovn-controller-vtep.at >> @@ -775,6 +775,10 @@ AT_CHECK([ovs-ofctl dump-flows br-int >> table=OFTABLE_PHY_TO_LOG | grep 'priority= >> priority=110,tun_id=0x<>,in_port=<> >> actions=move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],load:0x<>->NXM_NX_REG14[[0..14]],load:0x<>->NXM_NX_REG10[[1]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE) >> ]) >> >> +# Skip processing ICMP "packet too big" errors in this table if the packet >> came from a VTEP tunnel. >> +AT_CHECK([ovs-ofctl dump-flows br-int table=OFTABLE_PHY_TO_LOG | \ >> + grep -E 'icmp_type=3,icmp_code=4|icmp_type=2,icmp_code=0'], [1], >> []) >> + >> OVN_CONTROLLER_VTEP_STOP([], vtep1) >> OVN_CLEANUP([hv1]) >> AT_CLEANUP > With the tiny nits above addressed, I applied this to main and 26.03. > There are some conflicts on older branches (25.09 -> 24.03) so if you > need it there too, could you please post a branch specific backport patch? HI! i will do it > > Maybe for future patches, it would be good if you could update your git > config, there's currently a mismatch between: > > Author: Rukomoinikova Aleksandra <[email protected]> > > and your signoff. ok, sorry for this) > > Regards, > Dumitru >
_______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
