It is observed in some environments that there are much more ukeys than
actual DP flows. For example:

$ ovs-appctl upcall/show
system@ovs-system:
flows : (current 7) (avg 6) (max 117) (limit 2125)
offloaded flows : 525
dump duration : 1063ms
ufid enabled : true

23: (keys 3612)
24: (keys 3625)
25: (keys 3485)

The revalidator threads are busy revalidating the stale ukeys leading to
high CPU and long dump duration.

This patch records the last dump timestamp. If the flow was not dumped
for at least twice the idle time, we can assume the datapath flow now
longer exists and the ukey should be deleted.

Reported-by: Roi Dayan <r...@nvidia.com>
Co-authored-by: Han Zhou <hz...@ovn.org>
Co-authored-by: Roi Dayan <r...@nvidia.com>
Signed-off-by: Han Zhou <hz...@ovn.org>
Signed-off-by: Roi Dayan <r...@nvidia.com>
Signed-off-by: Eelco Chaudron <echau...@redhat.com>
---

v3: Rewrote fix to use actual dump state, and added a tests case.
---
 ofproto/ofproto-dpif-upcall.c                | 14 ++++-
 tests/system-offloads-traffic.at             | 64 ++++++++++++++++++++
 utilities/usdt-scripts/flow_reval_monitor.py |  2 +
 3 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 4d39bc5a7..5c130b694 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -57,6 +57,7 @@ COVERAGE_DEFINE(dumped_inconsistent_flow);
 COVERAGE_DEFINE(dumped_new_flow);
 COVERAGE_DEFINE(handler_duplicate_upcall);
 COVERAGE_DEFINE(revalidate_missed_dp_flow);
+COVERAGE_DEFINE(revalidate_missing_dp_flow);
 COVERAGE_DEFINE(ukey_dp_change);
 COVERAGE_DEFINE(ukey_invalid_stat_reset);
 COVERAGE_DEFINE(ukey_replace_contention);
@@ -278,6 +279,7 @@ enum flow_del_reason {
     FDR_BAD_ODP_FIT,        /* Bad ODP flow fit. */
     FDR_FLOW_IDLE,          /* Flow idle timeout. */
     FDR_FLOW_LIMIT,         /* Kill all flows condition reached. */
+    FDR_FLOW_MISSING_DP,    /* Flow is missing from the datapath. */
     FDR_FLOW_WILDCARDED,    /* Flow needs a narrower wildcard mask. */
     FDR_NO_OFPROTO,         /* Bridge not found. */
     FDR_PURGE,              /* User requested flow deletion. */
@@ -315,6 +317,7 @@ struct udpif_key {
     struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/
     const char *dp_layer OVS_GUARDED;         /* Last known dp_layer. */
     long long int created OVS_GUARDED;        /* Estimate of creation time. */
+    long long int last_dumped OVS_GUARDED;    /* Flow last dump time. */
     uint64_t dump_seq OVS_GUARDED;            /* Tracks udpif->dump_seq. */
     uint64_t reval_seq OVS_GUARDED;           /* Tracks udpif->reval_seq. */
     enum ukey_state state OVS_GUARDED;        /* Tracks ukey lifetime. */
@@ -1825,6 +1828,7 @@ ukey_create__(const struct nlattr *key, size_t key_len,
     ukey->state_thread = ovsthread_id_self();
     ukey->state_where = OVS_SOURCE_LOCATOR;
     ukey->created = ukey->flow_time = time_msec();
+    ukey->last_dumped = 0;
     memset(&ukey->stats, 0, sizeof ukey->stats);
     ukey->stats.used = used;
     ukey->dp_layer = NULL;
@@ -2456,7 +2460,14 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key 
*ukey,
         log_unexpected_stats_jump(ukey, stats);
     }
 
-    if (need_revalidate) {
+    if ((ukey->last_dumped ? ukey->last_dumped : ukey->created)
+        < udpif->dpif->current_ms - (2 * ofproto_max_idle)) {
+        /* If the flow was not dumped for at least twice the idle time,
+         * we can assume the datapath flow now longer exists and the ukey
+         * should be deleted. */
+        COVERAGE_INC(revalidate_missing_dp_flow);
+        *del_reason = FDR_FLOW_MISSING_DP;
+    } else if (need_revalidate) {
         if (should_revalidate(udpif, ukey, push.n_packets)) {
             if (!ukey->xcache) {
                 ukey->xcache = xlate_cache_new();
@@ -2890,6 +2901,7 @@ revalidate(struct revalidator *revalidator)
                 continue;
             }
 
+            ukey->last_dumped = now;
             ukey->offloaded = f->attrs.offloaded;
             if (!ukey->dp_layer
                 || (!dpif_synced_dp_layers(udpif->dpif)
diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
index d1da33d96..a9416cbce 100644
--- a/tests/system-offloads-traffic.at
+++ b/tests/system-offloads-traffic.at
@@ -93,6 +93,70 @@ AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows 
: [[1-9]]"], [0], [i
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([offloads - Forced removed datapath entries])
+OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . 
other_config:hw-offload=true])
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [ignore])
+
+AT_CHECK([ovs-appctl vlog/set ofproto_dpif_upcall:dbg])
+
+dnl Set idle timeout to 1 second.
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=1000])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.1.1.2 | FORMAT_PING], 
[0], [dnl
+10 packets transmitted, 10 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep 
"eth_type(0x0800)" | DUMP_CLEAN_SORTED], [0], [dnl
+in_port(2),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, 
used:0.001s, actions:output
+in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, 
used:0.001s, actions:output
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep -q "arp" ], [0])
+
+dnl Delete IPv4 entries, but keep the ARP ones.
+AT_CHECK([tc filter del dev ovs-p0 ingress protocol ip pref 2])
+AT_CHECK([tc filter del dev ovs-p1 ingress protocol ip pref 2])
+
+dnl Bring the remote ports down to avoid traffic.
+AT_CHECK([ip -n at_ns0 link set p0 down])
+AT_CHECK([ip -n at_ns1 link set p1 down])
+
+dnl Wait until the ARP flow has timed out.
+OVS_WAIT_UNTIL([test `ovs-appctl dpctl/dump-flows type=tc,offloaded | \
+                  grep "arp" | wc -l` -eq 0])
+
+dnl Set max-idle to 10 ms, so we are sure the max-idle * 2 has been reached.
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:max-idle=10])
+
+dnl Add and delete a port to force the revalidation.
+for i in `seq 3`; do
+  REV_RECONFIGURE=$(ovs-appctl coverage/read-counter rev_reconfigure)
+  AT_CHECK([ovs-vsctl add-port br0 ovs-p2 -- \
+              set interface ovs-p2 type=internal])
+  AT_CHECK([ovs-vsctl del-port ovs-p2 ])
+  OVS_WAIT_UNTIL([test `ovs-appctl coverage/read-counter rev_reconfigure` \
+                    -gt $REV_RECONFIGURE])
+done
+
+dnl Wait for another full round of revalidation.  After this all ukeys should
+dnl be gone.
+AT_CHECK([ovs-appctl revalidator/wait], [0])
+AT_CHECK([ovs-appctl revalidator/wait], [0])
+
+dnl Make sure no more ukeys exists.
+AT_CHECK([ovs-appctl upcall/show | grep '(keys' | awk '{print $3}' | \
+            grep -qv '0)'], [1])
+
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to flow_del (No such file or 
directory)/d"])
+AT_CLEANUP
+
 AT_SETUP([offloads - set ingress_policing_rate and ingress_policing_burst - 
offloads disabled])
 AT_KEYWORDS([ingress_policing])
 OVS_CHECK_TC_QDISC()
diff --git a/utilities/usdt-scripts/flow_reval_monitor.py 
b/utilities/usdt-scripts/flow_reval_monitor.py
index 28479a565..1de06c73b 100755
--- a/utilities/usdt-scripts/flow_reval_monitor.py
+++ b/utilities/usdt-scripts/flow_reval_monitor.py
@@ -249,6 +249,7 @@ FdrReasons = IntEnum(
         "FDR_BAD_ODP_FIT",
         "FDR_FLOW_IDLE",
         "FDR_FLOW_LIMIT",
+        "FDR_FLOW_MISSING_DP",
         "FDR_FLOW_WILDCARDED",
         "FDR_NO_OFPROTO",
         "FDR_PURGE",
@@ -265,6 +266,7 @@ FdrReasonStrings = {
     FdrReasons.FDR_BAD_ODP_FIT: "Bad ODP flow fit",
     FdrReasons.FDR_FLOW_IDLE: "Flow idle timeout",
     FdrReasons.FDR_FLOW_LIMIT: "Kill all flows condition reached",
+    FdrReasons.FDR_FLOW_MISSING_DP: "Flow is missing from the datapath",
     FdrReasons.FDR_FLOW_WILDCARDED: "Flow needs a narrower wildcard mask",
     FdrReasons.FDR_NO_OFPROTO: "Bridge not found",
     FdrReasons.FDR_PURGE: "User requested flow deletion",
-- 
2.45.2

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to