When a logical switch has stateful ACLs (allow-related) or load
balancers configured, all IP traffic is sent to conntrack in the
PRE_ACL and PRE_LB pipeline stages.  Traffic from/to remote VTEPs
has no conntrack zone assigned, so conntrack lookups return
ct_state=+trk+inv, causing the traffic to be dropped.

Fix this by adding priority-110 flows that bypass conntrack for
EVPN remote VTEP traffic, identified by the from_evpn_vtep and
to_evpn_vtep predicates.  These predicates check bit 31 of the
logical inport/outport registers, which is always set for EVPN
binding keys (OVN_MIN_EVPN_KEY = 1 << 31).

The EVPN skip in PRE_ACL is only added when stateful ACLs are
present (matching the existing pattern for localnet/router/switch
ports).  The EVPN skip in PRE_LB is unconditional, unlike localnet
ports which are gated on !has_lb_vip -- remote VTEPs have no
conntrack zones so conntrack would always fail regardless of LB
configuration.

Fixes: 9081afcf8698 ("controller: Create physical flows based on EVPN 
structures.")
Reported-at: https://redhat.atlassian.net/browse/FDP-3462
Suggested-by: Ales Musil <[email protected]>
Assisted-by: Claude, with model: claude-opus-4-6
Signed-off-by: Dumitru Ceara <[email protected]>
---
 lib/logical-fields.c | 22 +++++++++++
 lib/ovn-util.c       |  2 +-
 northd/northd.c      | 37 ++++++++++++++++++
 tests/multinode.at   | 30 ++++++++++++++
 tests/ovn-northd.at  | 93 ++++++++++++++++++++++++++++++++++++++++++++
 tests/ovn.at         | 16 +++++++-
 6 files changed, 197 insertions(+), 3 deletions(-)

diff --git a/lib/logical-fields.c b/lib/logical-fields.c
index 9b04762a17..4b8bcfdc6f 100644
--- a/lib/logical-fields.c
+++ b/lib/logical-fields.c
@@ -72,6 +72,28 @@ ovn_init_symtab(struct shash *symtab)
     expr_symtab_add_string(symtab, "inport", MFF_LOG_INPORT, NULL);
     expr_symtab_add_string(symtab, "outport", MFF_LOG_OUTPORT, NULL);
 
+    /* Also register the inport/outport backing registers as numeric fields
+     * so that predicates can reference specific bits (e.g., the EVPN key
+     * indicator at bit 31). */
+    char inport_reg[8], outport_reg[8];
+    snprintf(inport_reg, sizeof inport_reg, "reg%d",
+             MFF_LOG_INPORT - MFF_REG0);
+    expr_symtab_add_field(symtab, inport_reg, MFF_LOG_INPORT, NULL, false);
+    snprintf(outport_reg, sizeof outport_reg, "reg%d",
+             MFF_LOG_OUTPORT - MFF_REG0);
+    expr_symtab_add_field(symtab, outport_reg, MFF_LOG_OUTPORT, NULL, false);
+
+    /* EVPN binding keys have bit 31 set (OVN_MIN_EVPN_KEY = 1 << 31).
+     * Define predicates to identify traffic from/to remote VTEPs so that
+     * northd can skip conntrack without hard-coding register indices. */
+    char vtep_pred[64];
+    snprintf(vtep_pred, sizeof vtep_pred,
+             "%s == 0x80000000/0x80000000", inport_reg);
+    expr_symtab_add_predicate(symtab, "from_evpn_vtep", vtep_pred);
+    snprintf(vtep_pred, sizeof vtep_pred,
+             "%s == 0x80000000/0x80000000", outport_reg);
+    expr_symtab_add_predicate(symtab, "to_evpn_vtep", vtep_pred);
+
     /* The port isn't reserved along the pipeline it's just defined as symbol
      * to support matching on string and moving between string registers. */
     expr_symtab_add_string(symtab, "remote_outport",
diff --git a/lib/ovn-util.c b/lib/ovn-util.c
index 65fdb3a59c..fb02825ac4 100644
--- a/lib/ovn-util.c
+++ b/lib/ovn-util.c
@@ -1027,7 +1027,7 @@ ip_address_and_port_from_lb_key(const char *key, char 
**ip_address,
  * NOTE: If OVN_NORTHD_PIPELINE_CSUM is updated make sure to double check
  * whether an update of OVN_INTERNAL_MINOR_VER is required. */
 #define OVN_NORTHD_PIPELINE_CSUM "3760014456 11249"
-#define OVN_INTERNAL_MINOR_VER 13
+#define OVN_INTERNAL_MINOR_VER 14
 
 /* Returns the OVN version. The caller must free the returned value. */
 char *
diff --git a/northd/northd.c b/northd/northd.c
index bc817073e2..0b52db6cf6 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -6418,6 +6418,31 @@ skip_port_from_conntrack(const struct ovn_datapath *od, 
struct ovn_port *op,
     free(egress_match);
 }
 
+/* Skip conntrack for traffic from/to EVPN remote VTEPs.
+ * Remote VTEPs do not have conntrack zones assigned, so
+ * conntrack lookups would return +trk+inv and cause drops. */
+static void
+skip_evpn_from_conntrack(const struct ovn_datapath *od,
+                         bool has_stateful_acl,
+                         const struct ovn_stage *in_stage,
+                         const struct ovn_stage *out_stage, uint16_t priority,
+                         struct lflow_table *lflows,
+                         struct lflow_ref *lflow_ref)
+{
+    if (!od->has_evpn_vni) {
+        return;
+    }
+
+    const char *egress_action = has_stateful_acl
+                                ? "next;"
+                                : "flags.pkt_sampled = 0; ct_clear; next;";
+
+    ovn_lflow_add(lflows, od, in_stage, priority,
+                  "from_evpn_vtep", "next;", lflow_ref);
+    ovn_lflow_add(lflows, od, out_stage, priority,
+                  "to_evpn_vtep", egress_action, lflow_ref);
+}
+
 static void
 build_stateless_filter(const struct ovn_datapath *od,
                        const struct nbrec_acl *acl,
@@ -6520,6 +6545,10 @@ build_ls_stateful_rec_pre_acls(
                                      lflow_ref);
         }
 
+        skip_evpn_from_conntrack(od, true,
+                                 S_SWITCH_IN_PRE_ACL, S_SWITCH_OUT_PRE_ACL,
+                                 110, lflows, lflow_ref);
+
         /* stateless filters always take precedence over stateful ACLs. */
         build_stateless_filters(od, ls_port_groups, lflows, lflow_ref);
 
@@ -6751,6 +6780,14 @@ build_ls_stateful_rec_pre_lb(const struct 
ls_stateful_record *ls_stateful_rec,
         }
     }
 
+    /* EVPN remote VTEPs do not have conntrack zones, so their traffic
+     * must always skip conntrack regardless of whether LB VIPs are
+     * configured.  This differs from localnet ports which DO have
+     * conntrack zones and can participate in load balancing. */
+    skip_evpn_from_conntrack(od, ls_stateful_rec->has_stateful_acl,
+                             S_SWITCH_IN_PRE_LB, S_SWITCH_OUT_PRE_LB,
+                             110, lflows, lflow_ref);
+
     /* 'REGBIT_CONNTRACK_NAT' is set to let the pre-stateful table send
      * packet to conntrack for defragmentation and possibly for unNATting.
      *
diff --git a/tests/multinode.at b/tests/multinode.at
index c2587b68ae..d07660797c 100644
--- a/tests/multinode.at
+++ b/tests/multinode.at
@@ -3829,6 +3829,36 @@ OVS_WAIT_UNTIL([m_as ovn-gw-1 ip netns exec 
fabric_workload ping -6 -W 1 -c 1 10
 OVS_WAIT_UNTIL([m_as ovn-gw-2 ip netns exec fabric_workload ping    -W 1 -c 1 
10.0.0.12])
 OVS_WAIT_UNTIL([m_as ovn-gw-2 ip netns exec fabric_workload ping -6 -W 1 -c 1 
10::12])
 
+AS_BOX([Check EVPN traffic with stateful ACLs])
+dnl Adding a stateful ACL should not break traffic from/to remote VTEPs.
+dnl Without the conntrack skip flows (from_evpn_vtep / to_evpn_vtep),
+dnl conntrack would return +trk+inv for VXLAN traffic and drop it.
+check multinode_nbctl --wait=hv \
+    -- acl-add ls from-lport 100 "ip" allow-related \
+    -- acl-add ls to-lport 100 "ip" allow-related
+
+dnl Verify fabric-to-workload pings still work with stateful ACL.
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip netns exec fabric_workload ping    -W 1 -c 1 
10.0.0.11])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip netns exec fabric_workload ping -6 -W 1 -c 1 
10::11])
+OVS_WAIT_UNTIL([m_as ovn-gw-2 ip netns exec fabric_workload ping    -W 1 -c 1 
10.0.0.12])
+OVS_WAIT_UNTIL([m_as ovn-gw-2 ip netns exec fabric_workload ping -6 -W 1 -c 1 
10::12])
+
+dnl Also add a load balancer and verify pings still work.
+check multinode_nbctl --wait=hv \
+    -- lb-add lb1 10.0.0.100:80 10.0.0.11:80 \
+    -- ls-lb-add ls lb1
+
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip netns exec fabric_workload ping    -W 1 -c 1 
10.0.0.11])
+OVS_WAIT_UNTIL([m_as ovn-gw-1 ip netns exec fabric_workload ping -6 -W 1 -c 1 
10::11])
+OVS_WAIT_UNTIL([m_as ovn-gw-2 ip netns exec fabric_workload ping    -W 1 -c 1 
10.0.0.12])
+OVS_WAIT_UNTIL([m_as ovn-gw-2 ip netns exec fabric_workload ping -6 -W 1 -c 1 
10::12])
+
+dnl Cleanup ACL and LB.
+check multinode_nbctl --wait=hv \
+    -- acl-del ls \
+    -- ls-lb-del ls lb1 \
+    -- lb-del lb1
+
 AS_BOX([Check type-2 MAC+IP EVPN route advertisements])
 # Ping from the frr-ns to the fabric workload so that its IP is learned on
 # the fabric EVPN peer (and advertised to OVN).
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 796c30daf7..1d7bd6c288 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -19026,6 +19026,99 @@ OVN_CLEANUP_NORTHD
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD_NO_HV([
+AT_SETUP([LS EVPN conntrack skip with stateful ACLs and LBs])
+AT_KEYWORDS([dynamic-routing])
+ovn_start
+
+AS_BOX([EVPN switch, no ACLs or LBs])
+check ovn-nbctl --wait=sb \
+    -- ls-add ls-evpn \
+    -- set logical_switch ls-evpn other_config:dynamic-routing-vni=10 \
+    -- lsp-add ls-evpn lsp0 \
+    -- lsp-set-addresses lsp0 "00:00:00:00:00:01 10.0.0.1"
+
+ovn-sbctl dump-flows ls-evpn > lflows
+
+dnl No stateful ACL, so no EVPN skip flows in pre_acl.
+AT_CHECK([grep 'pre_acl' lflows | grep 'from_evpn_vtep'], [1])
+AT_CHECK([grep 'pre_acl' lflows | grep 'to_evpn_vtep'], [1])
+
+dnl pre_lb EVPN skip flows are always present for EVPN switches.
+AT_CHECK([grep 'pre_lb' lflows | grep 'from_evpn_vtep\|to_evpn_vtep' | 
ovn_strip_lflows], [0], [dnl
+  table=??(ls_in_pre_lb       ), priority=110  , match=(from_evpn_vtep), 
action=(next;)
+  table=??(ls_out_pre_lb      ), priority=110  , match=(to_evpn_vtep), 
action=(flags.pkt_sampled = 0; ct_clear; next;)
+])
+
+AS_BOX([EVPN switch + stateful ACL])
+check ovn-nbctl --wait=sb acl-add ls-evpn from-lport 100 "ip" allow-related
+
+ovn-sbctl dump-flows ls-evpn > lflows
+
+dnl Stateful ACL present, so EVPN skip flows appear in pre_acl.
+AT_CHECK([grep 'pre_acl' lflows | grep 'from_evpn_vtep\|to_evpn_vtep' | 
ovn_strip_lflows], [0], [dnl
+  table=??(ls_in_pre_acl      ), priority=110  , match=(from_evpn_vtep), 
action=(next;)
+  table=??(ls_out_pre_acl     ), priority=110  , match=(to_evpn_vtep), 
action=(next;)
+])
+
+dnl pre_lb EVPN skip flows with next; action (has_stateful_acl is true).
+AT_CHECK([grep 'pre_lb' lflows | grep 'from_evpn_vtep\|to_evpn_vtep' | 
ovn_strip_lflows], [0], [dnl
+  table=??(ls_in_pre_lb       ), priority=110  , match=(from_evpn_vtep), 
action=(next;)
+  table=??(ls_out_pre_lb      ), priority=110  , match=(to_evpn_vtep), 
action=(next;)
+])
+
+AS_BOX([EVPN switch + LB only])
+check ovn-nbctl --wait=sb \
+    -- acl-del ls-evpn \
+    -- lb-add lb1 10.0.0.100:80 10.0.0.1:80 \
+    -- ls-lb-add ls-evpn lb1
+
+ovn-sbctl dump-flows ls-evpn > lflows
+
+dnl No stateful ACL, so no EVPN skip flows in pre_acl.
+AT_CHECK([grep 'pre_acl' lflows | grep 'from_evpn_vtep'], [1])
+AT_CHECK([grep 'pre_acl' lflows | grep 'to_evpn_vtep'], [1])
+
+dnl pre_lb EVPN skip flows with ct_clear egress (no stateful ACL).
+AT_CHECK([grep 'pre_lb' lflows | grep 'from_evpn_vtep\|to_evpn_vtep' | 
ovn_strip_lflows], [0], [dnl
+  table=??(ls_in_pre_lb       ), priority=110  , match=(from_evpn_vtep), 
action=(next;)
+  table=??(ls_out_pre_lb      ), priority=110  , match=(to_evpn_vtep), 
action=(flags.pkt_sampled = 0; ct_clear; next;)
+])
+
+AS_BOX([EVPN switch + ACL + LB])
+check ovn-nbctl --wait=sb acl-add ls-evpn from-lport 100 "ip" allow-related
+
+ovn-sbctl dump-flows ls-evpn > lflows
+
+dnl Stateful ACL present again, so EVPN skip flows appear in pre_acl.
+AT_CHECK([grep 'pre_acl' lflows | grep 'from_evpn_vtep\|to_evpn_vtep' | 
ovn_strip_lflows], [0], [dnl
+  table=??(ls_in_pre_acl      ), priority=110  , match=(from_evpn_vtep), 
action=(next;)
+  table=??(ls_out_pre_acl     ), priority=110  , match=(to_evpn_vtep), 
action=(next;)
+])
+
+dnl pre_lb egress action is next; because has_stateful_acl is true.
+AT_CHECK([grep 'pre_lb' lflows | grep 'from_evpn_vtep\|to_evpn_vtep' | 
ovn_strip_lflows], [0], [dnl
+  table=??(ls_in_pre_lb       ), priority=110  , match=(from_evpn_vtep), 
action=(next;)
+  table=??(ls_out_pre_lb      ), priority=110  , match=(to_evpn_vtep), 
action=(next;)
+])
+
+AS_BOX([Non-EVPN switch + ACL])
+check ovn-nbctl --wait=sb \
+    -- ls-add ls-plain \
+    -- lsp-add ls-plain lsp1 \
+    -- lsp-set-addresses lsp1 "00:00:00:00:00:02 10.0.0.2" \
+    -- acl-add ls-plain from-lport 100 "ip" allow-related
+
+ovn-sbctl dump-flows ls-plain > lflows
+
+dnl Non-EVPN switch must not have any EVPN skip flows.
+AT_CHECK([grep 'from_evpn_vtep' lflows], [1])
+AT_CHECK([grep 'to_evpn_vtep' lflows], [1])
+
+OVN_CLEANUP_NORTHD
+AT_CLEANUP
+])
+
 OVN_FOR_EACH_NORTHD_NO_HV([
 AT_SETUP([Check network function])
 ovn_start
diff --git a/tests/ovn.at b/tests/ovn.at
index cec3bb9a73..939dffc761 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -108,8 +108,11 @@ dnl
 dnl When we add or remove registers this test needs to be updated, of course.
 AT_SETUP([registers])
 AT_CHECK([ovstest test-ovn dump-symtab | grep reg | sort], [0],
-[[reg0 = xxreg0[96..127]
+[[from_evpn_vtep = reg14 == 0x80000000/0x80000000
+reg0 = xxreg0[96..127]
 reg1 = xxreg0[64..95]
+reg14 = NXM_NX_REG14
+reg15 = NXM_NX_REG15
 reg2 = xxreg0[32..63]
 reg3 = xxreg0[0..31]
 reg4 = xxreg1[96..127]
@@ -118,6 +121,7 @@ reg6 = xxreg1[32..63]
 reg7 = xxreg1[0..31]
 reg8 = xreg4[32..63]
 reg9 = xreg4[0..31]
+to_evpn_vtep = reg15 == 0x80000000/0x80000000
 xreg0 = xxreg0[64..127]
 xreg1 = xxreg0[0..63]
 xreg2 = xxreg1[64..127]
@@ -128,6 +132,14 @@ xxreg1 = NXM_NX_XXREG1
 ]])
 AT_CLEANUP
 
+dnl Check EVPN VTEP predicate definitions.
+AT_SETUP([EVPN VTEP fields])
+AT_CHECK([ovstest test-ovn dump-symtab | grep evpn_vtep | sort], [0],
+[[from_evpn_vtep = reg14 == 0x80000000/0x80000000
+to_evpn_vtep = reg15 == 0x80000000/0x80000000
+]])
+AT_CLEANUP
+
 dnl Check that the OVN conntrack field definitions are correct.
 AT_SETUP([conntrack fields])
 AT_CHECK([ovstest test-ovn dump-symtab | grep ^ct | sort], [0],
@@ -2108,7 +2120,7 @@ reg0[[1..3]] = get_fdb(eth.src);
     Cannot use 3-bit field reg0[[1..3]] where 32-bit field is required.
 
 reg15 = get_fdb(eth.dst);
-    Syntax error at `reg15' expecting field name.
+    encodes as set_field:0->reg15,resubmit(,OFTABLE_GET_FDB)
 
 outport = get_fdb(ip4.dst);
     Cannot use 32-bit field ip4.dst[[0..31]] where 48-bit field is required.
-- 
2.53.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to