On 31 Jan 2023, at 14:13, Ilya Maximets wrote:
> On 1/13/23 13:57, Eelco Chaudron wrote: >> When a flow gets modified, i.e. the actions are changes, the tc layer will >> remove, and re-add the flow. This is causing all the counters to be reset. >> >> This patch will remember the previous tc counters and adjust any requests >> for statistics. This is done in a similar way as the rte_flow implementation. >> >> It also updates the check_pkt_len tc test to purge the flows, so we do >> not use updated tc flows, with their counters. > > Could you clarify what exactly you mean here? I hope this is more clear: It also updates the check_pkt_len tc test to purge the flows, so we do not use existing updated tc flow counters, but start with fresh installed set of datapath flows. >> >> Signed-off-by: Eelco Chaudron <echau...@redhat.com> >> >> --- >> -v2: Do not update the stats->used, as in terse dump they should be 0. >> -v3: Added some comments based on the v2 review. >> >> Please note that for now two copies of the test case exists, but I will clean >> this up once this gets applied by submitting a new revision of the >> 'tests: Add system-traffic.at tests to check-offloads' series. >> >> lib/netdev-offload-tc.c | 98 >> ++++++++++++++++++++++++++++++++------ >> lib/tc.h | 1 >> tests/system-offloads-traffic.at | 65 +++++++++++++++++++++++-- >> tests/system-traffic.at | 64 +++++++++++++++++++++++++ >> 4 files changed, 207 insertions(+), 21 deletions(-) >> >> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c >> index ce7f8ad97..59c113187 100644 >> --- a/lib/netdev-offload-tc.c >> +++ b/lib/netdev-offload-tc.c >> @@ -97,6 +97,12 @@ static int netdev_tc_parse_nl_actions(struct netdev >> *netdev, >> bool *recirc_act, bool more_actions, >> struct tc_action **need_jump_update); >> >> +static void parse_tc_flower_to_stats(struct tc_flower *flower, >> + struct dpif_flow_stats *stats); >> + >> +static int get_ufid_adjust_stats(const ovs_u128 *ufid, >> + struct dpif_flow_stats *stats); > > No need to specify parameter names in prototypes. I know, but all forward declarations in this c file have them included, so I decided to do the same. So I kept them for now, but let me know if you think they should be removed. >> + >> static bool >> is_internal_port(const char *type) >> { >> @@ -193,6 +199,9 @@ static struct ovs_mutex ufid_lock = >> OVS_MUTEX_INITIALIZER; >> * @ufid: ufid assigned to the flow >> * @id: tc filter id (tcf_id) >> * @netdev: netdev associated with the tc rule >> + * @adjust_stats: When flow gets updated with new actions, we need to adjust >> + * the reported stats to include previous values as the >> hardware >> + * rule is removed and re-added. This stats copy is used for >> it. >> */ >> struct ufid_tc_data { >> struct hmap_node ufid_to_tc_node; >> @@ -200,6 +209,7 @@ struct ufid_tc_data { >> ovs_u128 ufid; >> struct tcf_id id; >> struct netdev *netdev; >> + struct dpif_flow_stats adjust_stats; >> }; >> >> static void >> @@ -233,12 +243,37 @@ del_ufid_tc_mapping(const ovs_u128 *ufid) >> ovs_mutex_unlock(&ufid_lock); >> } >> >> +static void >> +netdev_tc_adjust_stats(struct dpif_flow_stats *stats, >> + struct dpif_flow_stats *adjust_stats) > > The adjust_stats pointer could be const. Will do in next rev. >> +{ >> + /* Do not try to restore the stats->used, as in terse >> + * mode dumps we will always report them as 0. > > I'm not sure I understand that part. Why we will report them > as zero? If we do a terse dump, we will not process the TLV holding this value, and we will return 0. The revalidator code will override/update this value in udpif_update_used(). > Not restoring the 'used' might not be a problem for a general > case, because freshly gathered stats will have more up to date > 'used' value, but that might be a problem for the 'never' case > where updated fow was not used after modification but it was > used before. For the revalidator use case it should always return 0, so it will take care of it with udpif_update_used(). If we return the last used value retriefd during the tc_get_flower() in del_filter_and_ufid_mapping(). It causes the revalidator process to fail, as it will always report the wrong/old value (as it would be larger than 0, which was what I had before). >> + * tcp_flags is not used by tc, so no need to update it. */ >> + stats->n_bytes += adjust_stats->n_bytes; >> + stats->n_packets += adjust_stats->n_packets; >> +} >> + >> /* Wrapper function to delete filter and ufid tc mapping */ >> static int >> -del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid) >> +del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid, >> + struct dpif_flow_stats *stats) >> { >> + struct tc_flower flower; >> int err; >> >> + if (stats) { >> + memset(stats, 0, sizeof *stats); >> + if (!tc_get_flower(id, &flower)) { >> + struct dpif_flow_stats adjust_stats; >> + >> + parse_tc_flower_to_stats(&flower, stats); >> + if (!get_ufid_adjust_stats(ufid, &adjust_stats)) { >> + netdev_tc_adjust_stats(stats, &adjust_stats); >> + } >> + } >> + } >> + >> err = tc_del_filter(id); >> if (!err) { >> del_ufid_tc_mapping(ufid); >> @@ -249,7 +284,7 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const >> ovs_u128 *ufid) >> /* Add ufid entry to ufid_to_tc hashmap. */ >> static void >> add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid, >> - struct tcf_id *id) >> + struct tcf_id *id, struct dpif_flow_stats *stats) >> { >> struct ufid_tc_data *new_data = xzalloc(sizeof *new_data); >> size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0); >> @@ -261,6 +296,9 @@ add_ufid_tc_mapping(struct netdev *netdev, const >> ovs_u128 *ufid, >> new_data->ufid = *ufid; >> new_data->id = *id; >> new_data->netdev = netdev_ref(netdev); >> + if (stats) { >> + new_data->adjust_stats = *stats; >> + } >> >> ovs_mutex_lock(&ufid_lock); >> hmap_insert(&ufid_to_tc, &new_data->ufid_to_tc_node, ufid_hash); >> @@ -292,6 +330,30 @@ get_ufid_tc_mapping(const ovs_u128 *ufid, struct tcf_id >> *id) >> return ENOENT; >> } >> >> +/* Get adjust_stats from ufid_to_tc hashmap. >> + * >> + * Returns 0 if successful and fills stats with adjust_stats. >> + * Otherwise returns the error. >> +*/ >> +static int >> +get_ufid_adjust_stats(const ovs_u128 *ufid, struct dpif_flow_stats *stats) >> +{ >> + size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0); >> + struct ufid_tc_data *data; >> + >> + ovs_mutex_lock(&ufid_lock); >> + HMAP_FOR_EACH_WITH_HASH (data, ufid_to_tc_node, ufid_hash, &ufid_to_tc) >> { >> + if (ovs_u128_equals(*ufid, data->ufid)) { >> + *stats = data->adjust_stats; >> + ovs_mutex_unlock(&ufid_lock); >> + return 0; >> + } >> + } >> + ovs_mutex_unlock(&ufid_lock); >> + >> + return ENOENT; >> +} >> + >> /* Find ufid entry in ufid_to_tc hashmap using tcf_id id. >> * The result is saved in ufid. >> * >> @@ -1193,6 +1255,7 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump, >> get_tc_qdisc_hook(netdev)); >> >> while (nl_dump_next(dump->nl_dump, &nl_flow, rbuffer)) { >> + struct dpif_flow_stats adjust_stats; >> struct tc_flower flower; >> >> if (parse_netlink_to_tc_flower(&nl_flow, &id, &flower, >> dump->terse)) { >> @@ -1210,6 +1273,10 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump >> *dump, >> continue; >> } >> >> + if (!get_ufid_adjust_stats(ufid, &adjust_stats)) { >> + netdev_tc_adjust_stats(stats, &adjust_stats); >> + } >> + >> match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX); >> match->flow.in_port.odp_port = dump->port; >> match_set_recirc_id(match, id.chain); >> @@ -2058,6 +2125,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match >> *match, >> struct flow *mask = &match->wc.masks; >> const struct flow_tnl *tnl = &match->flow.tunnel; >> struct flow_tnl *tnl_mask = &mask->tunnel; >> + struct dpif_flow_stats adjust_stats; >> bool recirc_act = false; >> uint32_t block_id = 0; >> struct tcf_id id; >> @@ -2351,10 +2419,12 @@ netdev_tc_flow_put(struct netdev *netdev, struct >> match *match, >> return EOPNOTSUPP; >> } >> >> + memset(&adjust_stats, 0, sizeof adjust_stats); >> if (get_ufid_tc_mapping(ufid, &id) == 0) { >> VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", >> id.handle, id.prio); >> - info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id, >> ufid); >> + info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping( >> + &id, ufid, &adjust_stats); >> } >> >> prio = get_prio_for_tc_flower(&flower); >> @@ -2372,8 +2442,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match >> *match, >> if (!err) { >> if (stats) { >> memset(stats, 0, sizeof *stats); >> + netdev_tc_adjust_stats(stats, &adjust_stats); >> } >> - add_ufid_tc_mapping(netdev, ufid, &id); >> + add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats); >> } >> >> return err; >> @@ -2414,6 +2485,13 @@ netdev_tc_flow_get(struct netdev *netdev, >> parse_tc_flower_to_match(netdev, &flower, match, actions, >> stats, attrs, buf, false); >> >> + if (stats) { >> + struct dpif_flow_stats adjust_stats; >> + >> + if (!get_ufid_adjust_stats(ufid, &adjust_stats)) { >> + netdev_tc_adjust_stats(stats, &adjust_stats); >> + } >> + } >> match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX); >> match->flow.in_port.odp_port = in_port; >> match_set_recirc_id(match, id.chain); >> @@ -2426,7 +2504,6 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, >> const ovs_u128 *ufid, >> struct dpif_flow_stats *stats) >> { >> - struct tc_flower flower; >> struct tcf_id id; >> int error; >> >> @@ -2435,16 +2512,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, >> return error; >> } >> >> - if (stats) { >> - memset(stats, 0, sizeof *stats); >> - if (!tc_get_flower(&id, &flower)) { >> - parse_tc_flower_to_stats(&flower, stats); >> - } >> - } >> - >> - error = del_filter_and_ufid_mapping(&id, ufid); >> - >> - return error; >> + return del_filter_and_ufid_mapping(&id, ufid, stats); >> } >> >> static int >> diff --git a/lib/tc.h b/lib/tc.h >> index a828fd3e3..404726f14 100644 >> --- a/lib/tc.h >> +++ b/lib/tc.h >> @@ -343,7 +343,6 @@ static inline bool >> is_tcf_id_eq(struct tcf_id *id1, struct tcf_id *id2) >> { >> return id1->prio == id2->prio >> - && id1->handle == id2->handle >> && id1->handle == id2->handle >> && id1->hook == id2->hook >> && id1->block_id == id2->block_id >> diff --git a/tests/system-offloads-traffic.at >> b/tests/system-offloads-traffic.at >> index 1a6057080..6532e439e 100644 >> --- a/tests/system-offloads-traffic.at >> +++ b/tests/system-offloads-traffic.at >> @@ -395,7 +395,7 @@ AT_CHECK([cat p4.pcap | awk 'NF{print $NF}' | uniq -c | >> awk '{$1=$1;print}'], [0 >> # This test verifies the total packet counters work when individual branches >> # are taken. >> >> -AT_CHECK([ovs-appctl revalidator/wait], [0]) >> +AT_CHECK([ovs-appctl revalidator/purge], [0]) >> AT_CHECK([ovs-ofctl del-flows br0]) >> AT_DATA([flows.txt], [dnl >> table=0,in_port=2 actions=output:1 >> @@ -416,8 +416,8 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 10 -i 0.1 -w 2 -s >> 1024 10.1.1.2 | FORMAT_PIN >> ], [], [ovs-appctl dpctl/dump-flows; ovs-ofctl dump-flows br0]) >> >> AT_CHECK([ovs-appctl dpctl/dump-flows | grep "eth_type(0x0800)" | >> DUMP_CLEAN_SORTED | sed 's/bytes:11440/bytes:11720/'], [0], [dnl > > This check has a bytes counter adjustment for older kernels. > The numbers are liely incorrect now. > Also, is this change adjustment necessary? You seem to not follow > this pattern in the newly introduced tests. Yes, I messed this up, I should have changed this line, instead of the one below :( I kept this pattern in because if Nvidia/Kernel folks ever fix the counters ;) I will also add this to the new test (I was planning on doing this when the tests patch gets in, but might as well do it now ;) >> -in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), packets:20, bytes:11720, >> used:0.001s, actions:check_pkt_len(size=200,gt(3),le(3)) >> -in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:20, bytes:11720, >> used:0.001s, actions:output >> +in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), packets:19, bytes:11348, >> used:0.001s, actions:check_pkt_len(size=200,gt(3),le(3)) >> +in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:19, bytes:11348, >> used:0.001s, actions:output >> ]) >> >> >> @@ -490,7 +490,7 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 10 -i 0.1 -w 2 -s >> 1024 10.1.1.2 | FORMAT_PIN >> OVS_CHECK_ACTIONS([check_pkt_len(size=200,gt(3,5),le(3,4))]) >> >> >> -AT_CHECK([ovs-appctl revalidator/wait], [0]) >> +AT_CHECK([ovs-appctl revalidator/purge], [0]) >> AT_CHECK([ovs-ofctl del-flows br0]) >> AT_DATA([flows.txt], [dnl >> table=0,in_port=2 actions=output:1 >> @@ -517,7 +517,7 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 10 -i 0.1 -w 2 -s >> 1024 10.1.1.2 | FORMAT_PIN >> >> AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep >> "eth_type(0x0800)" | DUMP_CLEAN_SORTED | sed -e 's/bytes:11348/bytes:11614/' >> -e 's/bytes:11440/bytes:11720/'], [0], [dnl > > Same here. See above. Let me know if you need to discuss more, if not I’ll try to send a v4 tomorrow. //Eelco _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev