On 12/15/22 09:52, Eelco Chaudron wrote:
When a flow gets modified, i.e. the actions are changes, the tc layer will
remove, and re-add the flow. This is causing all the counters to be reset.

This patch will remember the previous tc counters and adjust any requests
for statistics. This is done in a similar way as the rte_flow implementation.

It also updates the check_pkt_len tc test to purge the flows, so we do
not use updated tc flows, with their counters.

Signed-off-by: Eelco Chaudron <[email protected]>
---
Please note that for now two copies of the test case exists, but I will clean
this up once this gets applied by submitting a new revision of the
'tests: Add system-traffic.at tests to check-offloads' series.

v2: Do not update the stats->used, as in terse dump they should be 0.

  lib/netdev-offload-tc.c          |   92 ++++++++++++++++++++++++++++++++------
  lib/tc.h                         |    1
  tests/system-offloads-traffic.at |   65 +++++++++++++++++++++++++--
  tests/system-traffic.at          |   64 ++++++++++++++++++++++++++
  4 files changed, 201 insertions(+), 21 deletions(-)

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index ce7f8ad97..03c25fc38 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -97,6 +97,12 @@ static int netdev_tc_parse_nl_actions(struct netdev *netdev,
                                        bool *recirc_act, bool more_actions,
                                        struct tc_action **need_jump_update);
+static void parse_tc_flower_to_stats(struct tc_flower *flower,
+                                     struct dpif_flow_stats *stats);
+
+static int get_ufid_adjust_stats(const ovs_u128 *ufid,
+                                 struct dpif_flow_stats *stats);
+
  static bool
  is_internal_port(const char *type)
  {
@@ -193,6 +199,9 @@ static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
   * @ufid: ufid assigned to the flow
   * @id: tc filter id (tcf_id)
   * @netdev: netdev associated with the tc rule
+ * @adjust_stats: When flow gets updated with new actions, we need to adjust
+ *                the reported stats to include previous values as the hardware
+ *                rule is removed and re-added. This stats copy is used for it.
   */
  struct ufid_tc_data {
      struct hmap_node ufid_to_tc_node;
@@ -200,6 +209,7 @@ struct ufid_tc_data {
      ovs_u128 ufid;
      struct tcf_id id;
      struct netdev *netdev;
+    struct dpif_flow_stats adjust_stats;
  };
static void
@@ -233,12 +243,36 @@ del_ufid_tc_mapping(const ovs_u128 *ufid)
      ovs_mutex_unlock(&ufid_lock);
  }
+static void
+netdev_tc_adjust_stats(struct dpif_flow_stats *stats,
+                       struct dpif_flow_stats *adjust_stats)
+{
+    /* Do not try to restore the stats->used, as in terse
+     * mode dumps we will always report them as 0. */
+    stats->n_bytes += adjust_stats->n_bytes;
+    stats->n_packets += adjust_stats->n_packets;
Why not also update the other members of dpif_flow_stats?
+}
+
  /* Wrapper function to delete filter and ufid tc mapping */
  static int
-del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid)
+del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid,
+                            struct dpif_flow_stats *stats)
  {
+    struct tc_flower flower;
      int err;
+ if (stats) {
+        memset(stats, 0, sizeof *stats);
+        if (!tc_get_flower(id, &flower)) {
+            struct dpif_flow_stats adjust_stats;
+
+            parse_tc_flower_to_stats(&flower, stats);
+            if (!get_ufid_adjust_stats(ufid, &adjust_stats)) {
+                netdev_tc_adjust_stats(stats, &adjust_stats);
+            }
+        }
+    }
+
      err = tc_del_filter(id);
      if (!err) {
          del_ufid_tc_mapping(ufid);
@@ -249,7 +283,7 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const 
ovs_u128 *ufid)
  /* Add ufid entry to ufid_to_tc hashmap. */
  static void
  add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
-                    struct tcf_id *id)
+                    struct tcf_id *id, struct dpif_flow_stats *stats)
  {
      struct ufid_tc_data *new_data = xzalloc(sizeof *new_data);
      size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
@@ -261,6 +295,9 @@ add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 
*ufid,
      new_data->ufid = *ufid;
      new_data->id = *id;
      new_data->netdev = netdev_ref(netdev);
+    if (stats) {
+        new_data->adjust_stats = *stats;
+    }
lgtm
ovs_mutex_lock(&ufid_lock);
      hmap_insert(&ufid_to_tc, &new_data->ufid_to_tc_node, ufid_hash);
@@ -292,6 +329,25 @@ get_ufid_tc_mapping(const ovs_u128 *ufid, struct tcf_id 
*id)
      return ENOENT;
  }

/* Get adjust_stats from ufid_to_tc hashmap.
 *
 * Returns 0 if successful and fills stats with adjust_stats.
 * Otherwise returns the error.
 */

+static int
+get_ufid_adjust_stats(const ovs_u128 *ufid, struct dpif_flow_stats *stats)
+{
+    size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
+    struct ufid_tc_data *data;
+
+    ovs_mutex_lock(&ufid_lock);
+    HMAP_FOR_EACH_WITH_HASH (data, ufid_to_tc_node, ufid_hash, &ufid_to_tc) {
This question isn't about your patch so feel free to ignore it. It's unclear to me how data gets assigned. I get that the macro looks through the nodes of ufid_to_tc with matching ufid_hash. What I dont get is how it converts the hmap_node to ufid_tc_data

Other than that this section looks identical to get_ufid_tc_mapping, so it lgtm. But if we add more get_* functions we should probaby write a function to return the matching ufid_tc_data structure instead
+        if (ovs_u128_equals(*ufid, data->ufid)) {
+            *stats = data->adjust_stats;
+            ovs_mutex_unlock(&ufid_lock);
+            return 0;
+        }
+    }
+    ovs_mutex_unlock(&ufid_lock);
+
+    return ENOENT;
+}
+
  /* Find ufid entry in ufid_to_tc hashmap using tcf_id id.
   * The result is saved in ufid.
   *
@@ -1193,6 +1249,7 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
                          get_tc_qdisc_hook(netdev));
while (nl_dump_next(dump->nl_dump, &nl_flow, rbuffer)) {
+        struct dpif_flow_stats adjust_stats;
          struct tc_flower flower;
if (parse_netlink_to_tc_flower(&nl_flow, &id, &flower, dump->terse)) {
@@ -1210,6 +1267,10 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
              continue;
          }
+ if (!get_ufid_adjust_stats(ufid, &adjust_stats)) {
+            netdev_tc_adjust_stats(stats, &adjust_stats);
+        }
+
          match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
          match->flow.in_port.odp_port = dump->port;
          match_set_recirc_id(match, id.chain);
@@ -2058,6 +2119,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match 
*match,
      struct flow *mask = &match->wc.masks;
      const struct flow_tnl *tnl = &match->flow.tunnel;
      struct flow_tnl *tnl_mask = &mask->tunnel;
+    struct dpif_flow_stats adjust_stats;
      bool recirc_act = false;
      uint32_t block_id = 0;
      struct tcf_id id;
@@ -2351,10 +2413,12 @@ netdev_tc_flow_put(struct netdev *netdev, struct match 
*match,
          return EOPNOTSUPP;
      }
+ memset(&adjust_stats, 0, sizeof adjust_stats);
      if (get_ufid_tc_mapping(ufid, &id) == 0) {
          VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d",
                      id.handle, id.prio);
-        info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id, ufid);
+        info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(
+            &id, ufid, &adjust_stats);
It would had been nice to get rid of del_filter_and_ufid_mapping and call del_filter_and_ufid_mapping instead since it is also calling get_ufid_tc_mapping but this code snippet is doing something slightly different
      }
prio = get_prio_for_tc_flower(&flower);
@@ -2372,8 +2436,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match 
*match,
      if (!err) {
          if (stats) {
              memset(stats, 0, sizeof *stats);
+            netdev_tc_adjust_stats(stats, &adjust_stats);
Why do we bother to do this? is the caller going to be looking at stats?
          }
-        add_ufid_tc_mapping(netdev, ufid, &id);
+        add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats);
      }
return err;
@@ -2414,6 +2479,13 @@ netdev_tc_flow_get(struct netdev *netdev,
      parse_tc_flower_to_match(netdev, &flower, match, actions,
                               stats, attrs, buf, false);
+ if (stats) {
+        struct dpif_flow_stats adjust_stats;
+
+        if (!get_ufid_adjust_stats(ufid, &adjust_stats)) {
+            netdev_tc_adjust_stats(stats, &adjust_stats);
+        }
+    }
lgtm
      match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
      match->flow.in_port.odp_port = in_port;
      match_set_recirc_id(match, id.chain);
@@ -2426,7 +2498,6 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
                     const ovs_u128 *ufid,
                     struct dpif_flow_stats *stats)
  {
-    struct tc_flower flower;
      struct tcf_id id;
      int error;
@@ -2435,16 +2506,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
          return error;
      }
- if (stats) {
-        memset(stats, 0, sizeof *stats);
-        if (!tc_get_flower(&id, &flower)) {
-            parse_tc_flower_to_stats(&flower, stats);
-        }
-    }
-
-    error = del_filter_and_ufid_mapping(&id, ufid);
-
-    return error;
+    return del_filter_and_ufid_mapping(&id, ufid, stats);
  }
static int
diff --git a/lib/tc.h b/lib/tc.h
index a828fd3e3..404726f14 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -343,7 +343,6 @@ static inline bool
  is_tcf_id_eq(struct tcf_id *id1, struct tcf_id *id2)
  {
      return id1->prio == id2->prio
-           && id1->handle == id2->handle
             && id1->handle == id2->handle
Good catch! How did this ever get in?
             && id1->hook == id2->hook
             && id1->block_id == id2->block_id
diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
index 1a6057080..6532e439e 100644
--- a/tests/system-offloads-traffic.at
+++ b/tests/system-offloads-traffic.at
@@ -395,7 +395,7 @@ AT_CHECK([cat p4.pcap | awk 'NF{print $NF}' | uniq -c | awk 
'{$1=$1;print}'], [0
  # This test verifies the total packet counters work when individual branches
  # are taken.
-AT_CHECK([ovs-appctl revalidator/wait], [0])
+AT_CHECK([ovs-appctl revalidator/purge], [0])
  AT_CHECK([ovs-ofctl del-flows br0])
  AT_DATA([flows.txt], [dnl
  table=0,in_port=2 actions=output:1
@@ -416,8 +416,8 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 10 -i 0.1 -w 2 -s 1024 
10.1.1.2 | FORMAT_PIN
  ], [], [ovs-appctl dpctl/dump-flows; ovs-ofctl dump-flows br0])
AT_CHECK([ovs-appctl dpctl/dump-flows | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED | sed 's/bytes:11440/bytes:11720/'], [0], [dnl
-in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), packets:20, bytes:11720, 
used:0.001s, actions:check_pkt_len(size=200,gt(3),le(3))
-in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:20, bytes:11720, 
used:0.001s, actions:output
+in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), packets:19, bytes:11348, 
used:0.001s, actions:check_pkt_len(size=200,gt(3),le(3))
+in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:19, bytes:11348, 
used:0.001s, actions:output
  ])
@@ -490,7 +490,7 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 10 -i 0.1 -w 2 -s 1024 10.1.1.2 | FORMAT_PIN
  OVS_CHECK_ACTIONS([check_pkt_len(size=200,gt(3,5),le(3,4))])
-AT_CHECK([ovs-appctl revalidator/wait], [0])
+AT_CHECK([ovs-appctl revalidator/purge], [0])
  AT_CHECK([ovs-ofctl del-flows br0])
  AT_DATA([flows.txt], [dnl
  table=0,in_port=2 actions=output:1
@@ -517,7 +517,7 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 10 -i 0.1 -w 2 -s 1024 
10.1.1.2 | FORMAT_PIN
AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep "eth_type(0x0800)" | DUMP_CLEAN_SORTED | sed -e 's/bytes:11348/bytes:11614/' -e 's/bytes:11440/bytes:11720/'], [0], [dnl
  in_port(2),eth(),eth_type(0x0800),ipv4(proto=1,tos=0/0xfc,frag=no), 
packets:19, bytes:11614, used:0.001s, 
actions:check_pkt_len(size=200,gt(set(ipv4(tos=0x4/0xfc)),4),le(set(ipv4(tos=0x8/0xfc)),5)),3
-in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:20, bytes:11720, 
used:0.001s, actions:output
+in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:19, bytes:11614, 
used:0.001s, actions:output
  ])
sleep 1
@@ -678,3 +678,58 @@ 
OVS_CHECK_ACTIONS([check_pkt_len(size=200,gt(5),le(check_pkt_len(size=100,gt(5),
OVS_TRAFFIC_VSWITCHD_STOP
  AT_CLEANUP
+
+
+AT_SETUP([offloads - simulated flow update])
+OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . 
other_config:hw-offload=true])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+AT_DATA([flows.txt], [dnl
+add in_port=ovs-p0,actions=ovs-p1,br0
+add in_port=ovs-p1,actions=ovs-p0,br0
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], 
[0], [dnl
+10 packets transmitted, 10 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep "eth_type(0x0800)" | sort | 
strip_recirc | strip_used], [0], [dnl
+recirc_id(<recirc>),in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), 
packets:9, bytes:756, used:0.0s, actions:3,1
+recirc_id(<recirc>),in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), 
packets:9, bytes:756, used:0.0s, actions:2,1
+])
+
+AT_DATA([flows2.txt], [dnl
+modify in_port=ovs-p0,actions=ovs-p1
+modify in_port=ovs-p1,actions=ovs-p0
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows2.txt])
+AT_CHECK([ovs-appctl revalidator/wait], [0])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], 
[0], [dnl
+10 packets transmitted, 10 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep "eth_type(0x0800)" | sort | 
strip_recirc | strip_used], [0], [dnl
+recirc_id(<recirc>),in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), 
packets:19, bytes:1596, used:0.0s, actions:3
+recirc_id(<recirc>),in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), 
packets:19, bytes:1596, used:0.0s, actions:2
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl revalidator/wait], [0])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], 
[0], [dnl
+10 packets transmitted, 10 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep "eth_type(0x0800)" | sort | 
strip_recirc | strip_used], [0], [dnl
+recirc_id(<recirc>),in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), 
packets:29, bytes:2436, used:0.0s, actions:3,1
+recirc_id(<recirc>),in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), 
packets:29, bytes:2436, used:0.0s, actions:2,1
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index e5403519f..436f0504a 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -7440,3 +7440,67 @@ OVS_WAIT_UNTIL([cat p2.pcap | grep -E "0x0050: *0000 
*0000 *5002 *2000 *b85e *00
OVS_TRAFFIC_VSWITCHD_STOP
  AT_CLEANUP
+
+
+AT_SETUP([offloads - simulated flow update])
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+AT_DATA([flows.txt], [dnl
+add in_port=ovs-p0,actions=ovs-p1,br0
+add in_port=ovs-p1,actions=ovs-p0,br0
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], 
[0], [dnl
+10 packets transmitted, 10 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep "eth_type(0x0800)" | sort | dnl
+          strip_recirc | strip_used | dnl
+          sed 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/;s/,eth(),/,/'], dnl
+          [0], [dnl
+recirc_id(<recirc>),in_port(2),eth_type(0x0800),ipv4(frag=no), packets:9, 
bytes:882, used:0.0s, actions:3,1
+recirc_id(<recirc>),in_port(3),eth_type(0x0800),ipv4(frag=no), packets:9, 
bytes:882, used:0.0s, actions:2,1
+])
+
+AT_DATA([flows2.txt], [dnl
+modify in_port=ovs-p0,actions=ovs-p1
+modify in_port=ovs-p1,actions=ovs-p0
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows2.txt])
+AT_CHECK([ovs-appctl revalidator/wait], [0])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], 
[0], [dnl
+10 packets transmitted, 10 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep "eth_type(0x0800)" | sort | dnl
+          strip_recirc | strip_used | dnl
+          sed -e 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/;s/,eth(),/,/'], 
dnl
+          [0], [dnl
+recirc_id(<recirc>),in_port(2),eth_type(0x0800),ipv4(frag=no), packets:19, 
bytes:1862, used:0.0s, actions:3
+recirc_id(<recirc>),in_port(3),eth_type(0x0800),ipv4(frag=no), packets:19, 
bytes:1862, used:0.0s, actions:2
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl revalidator/wait], [0])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], 
[0], [dnl
+10 packets transmitted, 10 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep "eth_type(0x0800)" | sort | dnl
+          strip_recirc | strip_used | dnl
+          sed 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/;s/,eth(),/,/'], dnl
+          [0], [dnl
+recirc_id(<recirc>),in_port(2),eth_type(0x0800),ipv4(frag=no), packets:29, 
bytes:2842, used:0.0s, actions:3,1
+recirc_id(<recirc>),in_port(3),eth_type(0x0800),ipv4(frag=no), packets:29, 
bytes:2842, used:0.0s, actions:2,1
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to