When ACL tiers were introduced, the code kept track of the highest ACL
tier so that when iterating through ACL tiers, we would not attempt to
advance the current tier past the highest configured tier.

Unfortunately, keeping track of a single max ACL tier doesn't work when
ACLs are evaluated in three separate places. ACLs can be evaluated on
ingress before load balancing, on ingress after load balancing, and on
egress. By only keeping track of a single max ACL tier, it means that we
could perform superfluous checks if one stage of ACLs has a higher max
tier than other stages. As an example, if ingress pre-load balancing
ACLs have a maximum tier of 1, and egress ACLs have a maximum tier of 2,
then it means that for all stages of ACLs, we will evaluate tiers 0, 1,
and 2 of ACLs, even though only one stage of ACLs uses tier 2.

>From a pure functionality standpoint, this doesn't cause any issues.
Even if we advance the tier past the highest configured value, it
results in a no-op and the same net result happens.

However, the addition of sampling into ACLs has caused an unwanted side
effect. In the example scenario above, let's say the tier 1 ACL in the
ingress pre-load balancing stage evaluates to "pass". After the
evaluation, we send a sample for the "pass" result. We then advance
the tier to 2, then move back to ACL evaluation. There are no tier 2
ACLs, so we move on to the sampling stage again. We then send a second
sample for the previous "pass" result from tier 1. The result is
confusing since we've sent two samples for the same ACL evaluation.

To remedy this, we now track the max ACL tier in each of the stages
where ACLs are evaluated. Now there are no superfluous ACL evaluations
and no superfluous samples sent either.

Reported-at: https://issues.redhat.com/browse/FDP-760
Signed-off-by: Mark Michelson <[email protected]>
---
v2 -> v3:
 * Instead of adding to an additional system test, this patch now
   creates a new system test for ensuring the mismatched tiers do
   not result in extra samples being sent. This makes the test pass
   consistently.

v1 -> v2:
 * Add a comment to the memcmp() call to caution in case the
   acl_tier struct is altered and has padding added.
 * Switch to using MAX macro when assigning max ACL tiers.
 * Add a system test to ensure the referenced issue is fixed. For the
   record, this test fails on the main branch, but passes with this
   patch.
---
 northd/en-ls-stateful.c |  54 ++++++++++++++---
 northd/en-ls-stateful.h |   8 ++-
 northd/northd.c         |  47 +++++++++++----
 tests/ovn-northd.at     |  34 +++++++++++
 tests/system-ovn.at     | 124 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 247 insertions(+), 20 deletions(-)

diff --git a/northd/en-ls-stateful.c b/northd/en-ls-stateful.c
index 44f74ea08..11e97aa9b 100644
--- a/northd/en-ls-stateful.c
+++ b/northd/en-ls-stateful.c
@@ -204,16 +204,22 @@ ls_stateful_port_group_handler(struct engine_node *node, 
void *data_)
             ovn_datapaths_find_by_index(input_data.ls_datapaths,
                                         ls_stateful_rec->ls_index);
         bool had_stateful_acl = ls_stateful_rec->has_stateful_acl;
-        uint64_t max_acl_tier = ls_stateful_rec->max_acl_tier;
+        struct acl_tier old_max = ls_stateful_rec->max_acl_tier;
         bool had_acls = ls_stateful_rec->has_acls;
         bool modified = false;
 
         ls_stateful_record_reinit(ls_stateful_rec, od, ls_pg,
                                   input_data.ls_port_groups);
 
+        struct acl_tier new_max = ls_stateful_rec->max_acl_tier;
+
+        /* Using memcmp for struct acl_tier is fine since there is no padding
+         * in the struct. However, if the structure is changed, the memcmp
+         * may need to be updated to compare individual struct fields.
+         */
         if ((had_stateful_acl != ls_stateful_rec->has_stateful_acl)
             || (had_acls != ls_stateful_rec->has_acls)
-            || max_acl_tier != ls_stateful_rec->max_acl_tier) {
+            || memcmp(&old_max, &new_max, sizeof(old_max))) {
             modified = true;
         }
 
@@ -365,7 +371,8 @@ ls_stateful_record_set_acl_flags(struct ls_stateful_record 
*ls_stateful_rec,
                                  const struct ls_port_group_table *ls_pgs)
 {
     ls_stateful_rec->has_stateful_acl = false;
-    ls_stateful_rec->max_acl_tier = 0;
+    memset(&ls_stateful_rec->max_acl_tier, 0,
+           sizeof ls_stateful_rec->max_acl_tier);
     ls_stateful_rec->has_acls = false;
 
     if (ls_stateful_record_set_acl_flags_(ls_stateful_rec, od->nbs->acls,
@@ -391,6 +398,38 @@ ls_stateful_record_set_acl_flags(struct ls_stateful_record 
*ls_stateful_rec,
     }
 }
 
+static void
+update_ls_max_acl_tier(struct ls_stateful_record *ls_stateful_rec,
+                       const struct nbrec_acl *acl)
+{
+    if (!acl->tier) {
+        return;
+    }
+
+    uint64_t *tier;
+
+    if (!strcmp(acl->direction, "from-lport")) {
+        if (smap_get_bool(&acl->options, "apply-after-lb", false)) {
+            tier = &ls_stateful_rec->max_acl_tier.ingress_post_lb;
+        } else {
+            tier = &ls_stateful_rec->max_acl_tier.ingress_pre_lb;
+        }
+    } else {
+        tier = &ls_stateful_rec->max_acl_tier.egress;
+    }
+
+    *tier = MAX(*tier, acl->tier);
+}
+
+static bool
+ls_acl_tiers_are_maxed_out(struct acl_tier *acl_tier,
+                           uint64_t max_allowed_acl_tier)
+{
+    return acl_tier->ingress_post_lb == max_allowed_acl_tier &&
+           acl_tier->ingress_pre_lb == max_allowed_acl_tier &&
+           acl_tier->egress == max_allowed_acl_tier;
+}
+
 static bool
 ls_stateful_record_set_acl_flags_(struct ls_stateful_record *ls_stateful_rec,
                                   struct nbrec_acl **acls,
@@ -408,16 +447,15 @@ ls_stateful_record_set_acl_flags_(struct 
ls_stateful_record *ls_stateful_rec,
     ls_stateful_rec->has_acls = true;
     for (size_t i = 0; i < n_acls; i++) {
         const struct nbrec_acl *acl = acls[i];
-        if (acl->tier > ls_stateful_rec->max_acl_tier) {
-            ls_stateful_rec->max_acl_tier = acl->tier;
-        }
+        update_ls_max_acl_tier(ls_stateful_rec, acl);
         if (!ls_stateful_rec->has_stateful_acl
                 && !strcmp(acl->action, "allow-related")) {
             ls_stateful_rec->has_stateful_acl = true;
         }
         if (ls_stateful_rec->has_stateful_acl &&
-            ls_stateful_rec->max_acl_tier ==
-                nbrec_acl_col_tier.type.value.integer.max) {
+            ls_acl_tiers_are_maxed_out(
+                &ls_stateful_rec->max_acl_tier,
+                nbrec_acl_col_tier.type.value.integer.max)) {
             return true;
         }
     }
diff --git a/northd/en-ls-stateful.h b/northd/en-ls-stateful.h
index eae4b08e2..18a7398a6 100644
--- a/northd/en-ls-stateful.h
+++ b/northd/en-ls-stateful.h
@@ -33,6 +33,12 @@
 
 struct lflow_ref;
 
+struct acl_tier {
+    uint64_t ingress_pre_lb;
+    uint64_t ingress_post_lb;
+    uint64_t egress;
+};
+
 struct ls_stateful_record {
     struct hmap_node key_node;
 
@@ -46,7 +52,7 @@ struct ls_stateful_record {
     bool has_stateful_acl;
     bool has_lb_vip;
     bool has_acls;
-    uint64_t max_acl_tier;
+    struct acl_tier max_acl_tier;
 
     /* 'lflow_ref' is used to reference logical flows generated for
      * this ls_stateful record.
diff --git a/northd/northd.c b/northd/northd.c
index 3a488ff3d..58ffeea92 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -7338,28 +7338,34 @@ build_acl_action_lflows(const struct ls_stateful_record 
*ls_stateful_rec,
         S_SWITCH_OUT_ACL_EVAL,
     };
 
+    uint64_t max_acl_tiers[] = {
+        ls_stateful_rec->max_acl_tier.ingress_pre_lb,
+        ls_stateful_rec->max_acl_tier.ingress_post_lb,
+        ls_stateful_rec->max_acl_tier.egress,
+    };
+
     ds_clear(actions);
     ds_put_cstr(actions, REGBIT_ACL_VERDICT_ALLOW " = 0; "
                         REGBIT_ACL_VERDICT_DROP " = 0; "
                         REGBIT_ACL_VERDICT_REJECT " = 0; ");
-    if (ls_stateful_rec->max_acl_tier) {
-        ds_put_cstr(actions, REG_ACL_TIER " = 0; ");
-    }
 
     size_t verdict_len = actions->length;
-
     for (size_t i = 0; i < ARRAY_SIZE(stages); i++) {
         enum ovn_stage stage = stages[i];
+        if (max_acl_tiers[i]) {
+            ds_put_cstr(actions, REG_ACL_TIER " = 0; ");
+        }
+        size_t verdict_tier_len = actions->length;
         if (!ls_stateful_rec->has_acls) {
             ovn_lflow_add(lflows, od, stage, 0, "1", "next;", lflow_ref);
             continue;
         }
-        ds_truncate(actions, verdict_len);
+        ds_truncate(actions, verdict_tier_len);
         ds_put_cstr(actions, "next;");
         ovn_lflow_add(lflows, od, stage, 1000,
                       REGBIT_ACL_VERDICT_ALLOW " == 1", ds_cstr(actions),
                       lflow_ref);
-        ds_truncate(actions, verdict_len);
+        ds_truncate(actions, verdict_tier_len);
         ds_put_cstr(actions, debug_implicit_drop_action());
         ovn_lflow_add(lflows, od, stage, 1000,
                       REGBIT_ACL_VERDICT_DROP " == 1",
@@ -7367,7 +7373,7 @@ build_acl_action_lflows(const struct ls_stateful_record 
*ls_stateful_rec,
                       lflow_ref);
         bool ingress = ovn_stage_get_pipeline(stage) == P_IN;
 
-        ds_truncate(actions, verdict_len);
+        ds_truncate(actions, verdict_tier_len);
         build_acl_reject_action(actions, ingress);
 
         ovn_lflow_metered(lflows, od, stage, 1000,
@@ -7375,12 +7381,12 @@ build_acl_action_lflows(const struct ls_stateful_record 
*ls_stateful_rec,
                           copp_meter_get(COPP_REJECT, od->nbs->copp,
                           meter_groups), lflow_ref);
 
-        ds_truncate(actions, verdict_len);
+        ds_truncate(actions, verdict_tier_len);
         ds_put_cstr(actions, default_acl_action);
         ovn_lflow_add(lflows, od, stage, 0, "1", ds_cstr(actions), lflow_ref);
 
         struct ds tier_actions = DS_EMPTY_INITIALIZER;
-        for (size_t j = 0; j < ls_stateful_rec->max_acl_tier; j++) {
+        for (size_t j = 0; j < max_acl_tiers[i]; j++) {
             ds_clear(match);
             ds_put_format(match, REG_ACL_TIER " == %"PRIuSIZE, j);
             ds_clear(&tier_actions);
@@ -7392,6 +7398,7 @@ build_acl_action_lflows(const struct ls_stateful_record 
*ls_stateful_rec,
                          ds_cstr(&tier_actions), lflow_ref);
         }
         ds_destroy(&tier_actions);
+        ds_truncate(actions, verdict_len);
     }
 }
 
@@ -7460,6 +7467,21 @@ build_acl_log_related_flows(const struct ovn_datapath 
*od,
                             &acl->header_, lflow_ref);
 }
 
+static uint64_t
+choose_max_acl_tier(const struct ls_stateful_record *ls_stateful_rec,
+                    const struct nbrec_acl *acl)
+{
+    if (!strcmp(acl->direction, "from-lport")) {
+        if (smap_get_bool(&acl->options, "apply-after-lb", false)) {
+            return ls_stateful_rec->max_acl_tier.ingress_post_lb;
+        } else {
+            return ls_stateful_rec->max_acl_tier.ingress_pre_lb;
+        }
+    } else {
+        return ls_stateful_rec->max_acl_tier.egress;
+    }
+}
+
 static void
 build_acls(const struct ls_stateful_record *ls_stateful_rec,
            const struct ovn_datapath *od,
@@ -7656,8 +7678,9 @@ build_acls(const struct ls_stateful_record 
*ls_stateful_rec,
         build_acl_log_related_flows(od, lflows, acl, has_stateful,
                                     meter_groups, &match, &actions,
                                     lflow_ref);
+        uint64_t max_acl_tier = choose_max_acl_tier(ls_stateful_rec, acl);
         consider_acl(lflows, od, acl, has_stateful,
-                     meter_groups, ls_stateful_rec->max_acl_tier,
+                     meter_groups, max_acl_tier,
                      &match, &actions, lflow_ref);
         build_acl_sample_flows(ls_stateful_rec, od, lflows, acl,
                                &match, &actions, sampling_apps,
@@ -7675,8 +7698,10 @@ build_acls(const struct ls_stateful_record 
*ls_stateful_rec,
                 build_acl_log_related_flows(od, lflows, acl, has_stateful,
                                             meter_groups, &match, &actions,
                                             lflow_ref);
+                uint64_t max_acl_tier = choose_max_acl_tier(ls_stateful_rec,
+                                                            acl);
                 consider_acl(lflows, od, acl, has_stateful,
-                             meter_groups, ls_stateful_rec->max_acl_tier,
+                             meter_groups, max_acl_tier,
                              &match, &actions, lflow_ref);
                 build_acl_sample_flows(ls_stateful_rec, od, lflows, acl,
                                        &match, &actions, sampling_apps,
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 4eae1c67c..2013cc513 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -14253,3 +14253,37 @@ AT_CHECK([grep "lr_in_dnat" lr1flows | 
ovn_strip_lflows | grep "30.0.0.1"], [0],
 
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD_NO_HV([
+AT_SETUP([ACL mismatched tiers])
+ovn_start
+
+check ovn-nbctl ls-add S1
+
+# Ingress pre-lb ACLs have only a tier 1 ACL configured.
+# Ingress post-lb ACLs have tier up to 3 configured.
+# Egress ACLs have up to tier 2 configured.
+check ovn-nbctl --tier=1 acl-add S1 from-lport 1001 "tcp" allow
+check ovn-nbctl --tier=3 --apply-after-lb acl-add  S1 from-lport 1001 "tcp" 
allow
+check ovn-nbctl --tier=2 acl-add S1 to-lport 1001 "tcp" allow
+
+# Ingress pre-lb ACLs should only ever increment the tier to 1.
+AT_CHECK([ovn-sbctl lflow-list S1 | grep ls_in_acl_action | grep priority=500 
| ovn_strip_lflows], [0], [dnl
+  table=??(ls_in_acl_action   ), priority=500  , match=(reg8[[30..31]] == 0), 
action=(reg8[[30..31]] = 1; next(pipeline=ingress,table=??);)
+])
+
+# Ingress post-lb ACLs should increment the tier to 3.
+AT_CHECK([ovn-sbctl lflow-list S1 | grep ls_in_acl_after_lb_action | grep 
priority=500 | ovn_strip_lflows], [0], [dnl
+  table=??(ls_in_acl_after_lb_action), priority=500  , match=(reg8[[30..31]] 
== 0), action=(reg8[[30..31]] = 1; next(pipeline=ingress,table=??);)
+  table=??(ls_in_acl_after_lb_action), priority=500  , match=(reg8[[30..31]] 
== 1), action=(reg8[[30..31]] = 2; next(pipeline=ingress,table=??);)
+  table=??(ls_in_acl_after_lb_action), priority=500  , match=(reg8[[30..31]] 
== 2), action=(reg8[[30..31]] = 3; next(pipeline=ingress,table=??);)
+])
+
+# Egress ACLs should increment the tier to 2.
+AT_CHECK([ovn-sbctl lflow-list S1 | grep ls_out_acl_action | grep priority=500 
| ovn_strip_lflows], [0], [dnl
+  table=??(ls_out_acl_action  ), priority=500  , match=(reg8[[30..31]] == 0), 
action=(reg8[[30..31]] = 1; next(pipeline=egress,table=??);)
+  table=??(ls_out_acl_action  ), priority=500  , match=(reg8[[30..31]] == 1), 
action=(reg8[[30..31]] = 2; next(pipeline=egress,table=??);)
+])
+
+AT_CLEANUP
+])
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 4452d5676..e5b1fd43c 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -13430,6 +13430,130 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port 
patch-.*/d
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn -- Tiered ACL Sampling - tier mismatch])
+AT_SKIP_IF([test $HAVE_NFCAPD = no])
+AT_SKIP_IF([test $HAVE_NFDUMP = no])
+AT_KEYWORDS([ACL])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+dnl Set external-ids in br-int needed for ovn-controller
+check ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . 
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+dnl Start ovn-controller
+start_daemon ovn-controller
+
+dnl Logical network:
+dnl 1 logical switch
+dnl 2 VIFs
+
+check ovn-nbctl                                                  \
+    -- ls-add ls                                                 \
+    -- lsp-add ls vm1 -- lsp-set-addresses vm1 00:00:00:00:00:01 \
+    -- lsp-add ls vm2 -- lsp-set-addresses vm2 00:00:00:00:00:02
+ADD_NAMESPACES(vm1)
+ADD_VETH(vm1, vm1, br-int, "42.42.42.2/24", "00:00:00:00:00:01", "42.42.42.1")
+
+ADD_NAMESPACES(vm2)
+ADD_VETH(vm2, vm2, br-int, "42.42.42.3/24", "00:00:00:00:00:02", "42.42.42.1")
+
+collector1=$(ovn-nbctl create Sample_Collector id=1 name=c1 probability=65535 
set_id=100)
+check_row_count nb:Sample_Collector 1
+
+ovn-nbctl create Sampling_App type="acl-new" id="42"
+ovn-nbctl create Sampling_App type="acl-est" id="43"
+check_row_count nb:Sampling_App 2
+
+dnl Create two tiers of ACLs.
+dnl The first ACL is an ingress "pass" ACL at tier 0.
+ovn-nbctl                                                                     \
+    -- --id=@sample_1_new create Sample collector="$collector1" metadata=1001 \
+    -- --id=@sample_1_est create Sample collector="$collector1" metadata=1002 \
+    -- --tier=0 --sample-new=@sample_1_new --sample-est=@sample_1_est         \
+       acl-add ls from-lport 1 "inport == \"vm1\" && udp.dst == 1000"         \
+       pass
+
+dnl The second ACL is an unrelated egress ACL. However, it uses tier 1 instead
+dnl of tier 0.
+ovn-nbctl --tier=1 acl-add ls to-lport 1 "inport == \"vm1\" && udp.dst == 
1000" allow-related
+
+check_row_count nb:ACL 2
+check_row_count nb:Sample 2
+
+dnl Wait for ovn-controller to catch up.
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+dnl Start an IPFIX collector.
+DAEMONIZE([nfcapd -B 1024000 -w . -p 4242 2> collector.err], [collector.pid])
+
+dnl Wait for the collector to be up.
+OVS_WAIT_UNTIL([grep -q 'Startup nfcapd.' collector.err])
+
+dnl Configure the OVS flow sample collector.
+ovs-vsctl --id=@br get Bridge br-int \
+    -- --id=@ipfix create IPFIX targets=\"127.0.0.1:4242\" template_interval=1 
\
+    -- --id=@cs create Flow_Sample_Collector_Set id=100 bridge=@br ipfix=@ipfix
+
+check ovn-nbctl --wait=hv sync
+dnl And wait for it to be up and running.
+OVS_WAIT_UNTIL([ovs-ofctl dump-ipfix-flow br-int | grep -q '1 ids'])
+
+dnl Start UDP echo server on vm2.
+NETNS_DAEMONIZE([vm2], [nc -e /bin/cat -k -u -v -l -m 1 1000], 
[nc-vm2-1000.pid])
+
+dnl Send traffic to the UDP server (hits both ACL tiers).
+NS_CHECK_EXEC([vm1], [echo a | nc --send-only -u 42.42.42.3 1000])
+
+dnl Wait until OVS sampled all expected packets:
+dnl In this case, we only expect a single sampled packet.
+dnl The pass ACL should sample its "pass" result. The egress
+dnl ACL should not get hit (and it doesn't have sampling configured
+dnl anyway). A bug that previously existed in OVN would result in
+dnl the "pass" being sampled two times instead of just once.
+OVS_WAIT_UNTIL([ovs-ofctl dump-ipfix-flow br-int | grep -q 'sampled pkts=1'])
+
+dnl Check the IPFIX samples.
+kill $(cat collector.pid)
+OVS_WAIT_WHILE([kill -0 $(cat collector.pid) 2>/dev/null])
+
+dnl Can't match on observation domain ID due to the followig fix not being
+dnl available in any released version of nfdump:
+dnl https://github.com/phaag/nfdump/issues/544
+dnl
+dnl Only match on the point ID.
+AT_CHECK([for f in $(ls -1 nfcapd.*); do nfdump -o json -r $f; done | grep 
observationPointID | awk '{$1=$1;print}' | sort], [0], [dnl
+"observationPointID" : 1001,
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+
+AT_CLEANUP
+])
+
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([ovn -- ACL Sampling - Stateful ACL - to-lport router port])
 AT_SKIP_IF([test $HAVE_NFCAPD = no])
-- 
2.45.2

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to