Commit [1] added handling for E/W ICMPv4/v6 "fragmentation needed"
packets generated for overlay tunneled traffic. This was required
because the GENEVE kernel module generates ICMP "need frag" packets
when a tunneled packet exceeds the path MTU, and such packets were
previously dropped due to metadata needing to be swapped.
However, it did not cover the case where similar ICMP packets
arrive from VTEP tunnels. Such packets are not subject to the same
metadata handling constraints, since they are VXLAN-encapsulated and
are already delivered directly to the destination VIF MAC address.
As a result, they do not match the added for such packets rules and are
dropped. Exclude packets coming from VTEP tunnels from this special handling.
Fixes: ff54ab6cdced ("controller: Exclude ICMP frag-needed from VTEP tunnels.")
Signed-off-by: Alexandra Rukomoinikova <[email protected]>
---
controller/encaps.c | 29 +++++++++++-----
controller/local_data.c | 2 ++
controller/local_data.h | 1 +
controller/physical.c | 66 ++++++++++++++++++++++++------------
tests/ovn-controller-vtep.at | 4 +++
5 files changed, 71 insertions(+), 31 deletions(-)
diff --git a/controller/encaps.c b/controller/encaps.c
index 61f41bf3a..0f3dd5aed 100644
--- a/controller/encaps.c
+++ b/controller/encaps.c
@@ -44,6 +44,7 @@ encaps_register_ovs_idl(struct ovsdb_idl *ovs_idl)
ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_name);
ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_type);
ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_options);
+ ovsdb_idl_track_add_column(ovs_idl, &ovsrec_interface_col_other_config);
}
/* Enough context to create a new tunnel, using tunnel_add(). */
@@ -201,12 +202,14 @@ out:
}
static void
-tunnel_add(struct tunnel_ctx *tc, const struct sbrec_sb_global *sbg,
- const char *new_chassis_id, const struct sbrec_encap *encap,
- const char *local_ip,
+tunnel_add(struct tunnel_ctx *tc,
+ const struct sbrec_sb_global *sbg,
+ const struct sbrec_chassis *chassis_rec,
+ const struct sbrec_encap *encap, const char *local_ip,
const struct ovsrec_open_vswitch_table *ovs_table)
{
struct smap options = SMAP_INITIALIZER(&options);
+ struct smap other_config = SMAP_INITIALIZER(&other_config);
smap_add(&options, "remote_ip", encap->ip);
smap_add(&options, "local_ip", local_ip);
smap_add(&options, "key", "flow");
@@ -214,6 +217,8 @@ tunnel_add(struct tunnel_ctx *tc, const struct
sbrec_sb_global *sbg,
const char *csum = smap_get(&encap->options, "csum");
char *tunnel_entry_id = NULL;
char *tunnel_entry_id_old = NULL;
+ bool is_vtep_tunnel = smap_get_bool(&chassis_rec->other_config,
+ "is-vtep", false);
/*
* Since a chassis may have multiple encap-ip, we can't just add the
@@ -221,9 +226,9 @@ tunnel_add(struct tunnel_ctx *tc, const struct
sbrec_sb_global *sbg,
* combination of the chassis_name and the remote and local encap-ips to
* identify a specific tunnel to the remote chassis.
*/
- tunnel_entry_id = encaps_tunnel_id_create(new_chassis_id, encap->ip,
+ tunnel_entry_id = encaps_tunnel_id_create(chassis_rec->name, encap->ip,
local_ip);
- tunnel_entry_id_old = encaps_tunnel_id_create_legacy(new_chassis_id,
+ tunnel_entry_id_old = encaps_tunnel_id_create_legacy(chassis_rec->name,
encap->ip);
if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) {
smap_add(&options, "csum", csum);
@@ -258,7 +263,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct
sbrec_sb_global *sbg,
/* Add auth info if ipsec is enabled. */
if (sbg->ipsec) {
- smap_add(&options, "remote_name", new_chassis_id);
+ smap_add(&options, "remote_name", chassis_rec->name);
/* Force NAT-T traversal via configuration */
/* Two ipsec backends are supported: libreswan and strongswan */
@@ -276,6 +281,10 @@ tunnel_add(struct tunnel_ctx *tc, const struct
sbrec_sb_global *sbg,
}
}
+ if (is_vtep_tunnel) {
+ smap_add(&other_config, "is-vtep", "true");
+ }
+
/* If there's an existing tunnel record that does not need any change,
* keep it. Otherwise, create a new record (if there was an existing
* record, the new record will supplant it and encaps_run() will delete
@@ -312,10 +321,10 @@ tunnel_add(struct tunnel_ctx *tc, const struct
sbrec_sb_global *sbg,
* its name, otherwise generate a new, unique name. */
char *port_name = (tunnel
? xstrdup(tunnel->port->name)
- : tunnel_create_name(tc, new_chassis_id));
+ : tunnel_create_name(tc, chassis_rec->name));
if (!port_name) {
VLOG_WARN("Unable to allocate unique name for '%s' tunnel",
- new_chassis_id);
+ chassis_rec->name);
goto exit;
}
@@ -323,6 +332,7 @@ tunnel_add(struct tunnel_ctx *tc, const struct
sbrec_sb_global *sbg,
ovsrec_interface_set_name(iface, port_name);
ovsrec_interface_set_type(iface, encap->type);
ovsrec_interface_set_options(iface, &options);
+ ovsrec_interface_set_other_config(iface, &other_config);
struct ovsrec_port *port = ovsrec_port_insert(tc->ovs_txn);
ovsrec_port_set_name(port, port_name);
@@ -338,6 +348,7 @@ exit:
free(tunnel_entry_id);
free(tunnel_entry_id_old);
smap_destroy(&options);
+ smap_destroy(&other_config);
}
static bool
@@ -403,7 +414,7 @@ chassis_tunnel_add(const struct sbrec_chassis *chassis_rec,
}
VLOG_DBG("tunnel_add: '%s', local ip: %s", chassis_rec->name,
this_chassis->encaps[j]->ip);
- tunnel_add(tc, sbg, chassis_rec->name, chassis_rec->encaps[i],
+ tunnel_add(tc, sbg, chassis_rec, chassis_rec->encaps[i],
this_chassis->encaps[j]->ip, ovs_table);
tuncnt++;
}
diff --git a/controller/local_data.c b/controller/local_data.c
index dda746d73..5f1a52771 100644
--- a/controller/local_data.c
+++ b/controller/local_data.c
@@ -532,6 +532,8 @@ local_nonvif_data_run(const struct ovsrec_bridge *br_int,
tun->ofport = u16_to_ofp(ofport);
tun->type = tunnel_type;
tun->is_ipv6 = ip ? addr_is_ipv6(ip) : false;
+ tun->is_vtep_tunnel = smap_get_bool(&iface_rec->other_config,
+ "is-vtep", false);
free(hash_id);
free(ip);
diff --git a/controller/local_data.h b/controller/local_data.h
index 948c1a935..fdb81288c 100644
--- a/controller/local_data.h
+++ b/controller/local_data.h
@@ -146,6 +146,7 @@ struct chassis_tunnel {
ofp_port_t ofport;
enum chassis_tunnel_type type;
bool is_ipv6;
+ bool is_vtep_tunnel;
};
/* Flow-based tunnel that consolidates multiple endpoints into a single
diff --git a/controller/physical.c b/controller/physical.c
index 228f3d171..c1401074e 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -351,31 +351,34 @@ put_flow_based_remote_port_redirect_overlay(
}
}
+/* Add handling for E/W ICMPv4/v6 packets when tunneled packets exceed
+ * path MTU.
+ * If packet needs to be tunneled to another node and the physical
+ * interface used for tunneling has a lower MTU than the packet size,
+ * or if there is a route exception with a smaller MTU, kernel
+ * generates an ICMP "Fragmentation Needed" message, but the package
+ * metadata didn't change. Such packets might have been dropped due
+ * to required metadata modifications for returned packet.
+ *
+ * Mark these packets with MLF_RX_FROM_TUNNEL_BIT for further
+ * processing. Packets received from a VTEP tunnel should be passed
+ * through, and errors handled via the normal processing path, since
+ * port metadata is not carried in VTEP packets in VNI.
+ */
static void
-add_tunnel_ingress_flows(const struct chassis_tunnel *tun,
- enum mf_field_id mff_ovn_geneve,
- struct ovn_desired_flow_table *flow_table,
- struct ofpbuf *ofpacts)
+add_tunnel_ingress_icmp_need_frag_flows(const struct chassis_tunnel *tun,
+ struct ofpbuf *ofpacts,
+ struct ovn_desired_flow_table *table)
{
- /* Main ingress flow (priority 100) */
- struct match match = MATCH_CATCHALL_INITIALIZER;
- match_set_in_port(&match, tun->ofport);
-
- ofpbuf_clear(ofpacts);
- put_decapsulation(mff_ovn_geneve, tun, ofpacts);
- put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts);
+ if (tun->is_vtep_tunnel) {
+ return;
+ }
- ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match,
- ofpacts, hc_uuid);
+ struct match match = MATCH_CATCHALL_INITIALIZER;
- /* Set allow rx from tunnel bit */
put_load(1, MFF_LOG_FLAGS, MLF_RX_FROM_TUNNEL_BIT, 1, ofpacts);
put_resubmit(OFTABLE_CT_ZONE_LOOKUP, ofpacts);
- /* Add specific flows for E/W ICMPv{4,6} packets if tunnelled packets
- * do not fit path MTU. */
-
- /* IPv4 ICMP flow (priority 120) */
match_init_catchall(&match);
match_set_in_port(&match, tun->ofport);
match_set_dl_type(&match, htons(ETH_TYPE_IP));
@@ -383,10 +386,9 @@ add_tunnel_ingress_flows(const struct chassis_tunnel *tun,
match_set_icmp_type(&match, 3);
match_set_icmp_code(&match, 4);
- ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
+ ofctrl_add_flow(table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
ofpacts, hc_uuid);
- /* IPv6 ICMP flow (priority 120) */
match_init_catchall(&match);
match_set_in_port(&match, tun->ofport);
match_set_dl_type(&match, htons(ETH_TYPE_IPV6));
@@ -394,10 +396,30 @@ add_tunnel_ingress_flows(const struct chassis_tunnel *tun,
match_set_icmp_type(&match, 2);
match_set_icmp_code(&match, 0);
- ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
+ ofctrl_add_flow(table, OFTABLE_PHY_TO_LOG, 120, 0, &match,
ofpacts, hc_uuid);
}
+static void
+add_tunnel_ingress_flows(const struct chassis_tunnel *tun,
+ enum mf_field_id mff_ovn_geneve,
+ struct ovn_desired_flow_table *flow_table,
+ struct ofpbuf *ofpacts)
+{
+ /* Main ingress flow (priority 100) */
+ struct match match = MATCH_CATCHALL_INITIALIZER;
+ match_set_in_port(&match, tun->ofport);
+
+ ofpbuf_clear(ofpacts);
+ put_decapsulation(mff_ovn_geneve, tun, ofpacts);
+ put_resubmit(OFTABLE_LOCAL_OUTPUT, ofpacts);
+
+ ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match,
+ ofpacts, hc_uuid);
+
+ add_tunnel_ingress_icmp_need_frag_flows(tun, ofpacts, flow_table);
+}
+
static void
put_stack(enum mf_field_id field, struct ofpact_stack *stack)
{
@@ -3943,7 +3965,7 @@ physical_run(struct physical_ctx *p_ctx,
struct chassis_tunnel *tun;
HMAP_FOR_EACH (tun, hmap_node, p_ctx->chassis_tunnels) {
add_tunnel_ingress_flows(tun, p_ctx->mff_ovn_geneve, flow_table,
- &ofpacts);
+ &ofpacts);
}
/* Process packets that arrive from flow-based tunnels. */
diff --git a/tests/ovn-controller-vtep.at b/tests/ovn-controller-vtep.at
index 961324bd2..caf53e291 100644
--- a/tests/ovn-controller-vtep.at
+++ b/tests/ovn-controller-vtep.at
@@ -775,6 +775,10 @@ AT_CHECK([ovs-ofctl dump-flows br-int
table=OFTABLE_PHY_TO_LOG | grep 'priority=
priority=110,tun_id=0x<>,in_port=<>
actions=move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],load:0x<>->NXM_NX_REG14[[0..14]],load:0x<>->NXM_NX_REG10[[1]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE)
])
+# Skip processing ICMP "packet too big" errors in this table if the packet
came from a VTEP tunnel.
+AT_CHECK([ovs-ofctl dump-flows br-int table=OFTABLE_PHY_TO_LOG | \
+ grep -E 'icmp_type=3,icmp_code=4|icmp_type=2,icmp_code=0'], [1], [])
+
OVN_CONTROLLER_VTEP_STOP([], vtep1)
OVN_CLEANUP([hv1])
AT_CLEANUP
--
2.48.1
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev