On 1/31/23 15:38, Eelco Chaudron wrote:
>
>
> On 31 Jan 2023, at 14:13, Ilya Maximets wrote:
>
>> On 1/13/23 13:57, Eelco Chaudron wrote:
>>> When a flow gets modified, i.e. the actions are changes, the tc layer will
>>> remove, and re-add the flow. This is causing all the counters to be reset.
>>>
>>> This patch will remember the previous tc counters and adjust any requests
>>> for statistics. This is done in a similar way as the rte_flow
>>> implementation.
>>>
>>> It also updates the check_pkt_len tc test to purge the flows, so we do
>>> not use updated tc flows, with their counters.
>>
>> Could you clarify what exactly you mean here?
>
> I hope this is more clear:
>
> It also updates the check_pkt_len tc test to purge the flows, so we do
> not use existing updated tc flow counters, but start with fresh installed
> set of datapath flows.
>
>>>
>>> Signed-off-by: Eelco Chaudron <[email protected]>
>>>
>>> ---
>>> -v2: Do not update the stats->used, as in terse dump they should be 0.
>>> -v3: Added some comments based on the v2 review.
>>>
>>> Please note that for now two copies of the test case exists, but I will
>>> clean
>>> this up once this gets applied by submitting a new revision of the
>>> 'tests: Add system-traffic.at tests to check-offloads' series.
>>>
>>> lib/netdev-offload-tc.c | 98
>>> ++++++++++++++++++++++++++++++++------
>>> lib/tc.h | 1
>>> tests/system-offloads-traffic.at | 65 +++++++++++++++++++++++--
>>> tests/system-traffic.at | 64 +++++++++++++++++++++++++
>>> 4 files changed, 207 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>>> index ce7f8ad97..59c113187 100644
>>> --- a/lib/netdev-offload-tc.c
>>> +++ b/lib/netdev-offload-tc.c
>>> @@ -97,6 +97,12 @@ static int netdev_tc_parse_nl_actions(struct netdev
>>> *netdev,
>>> bool *recirc_act, bool more_actions,
>>> struct tc_action **need_jump_update);
>>>
>>> +static void parse_tc_flower_to_stats(struct tc_flower *flower,
>>> + struct dpif_flow_stats *stats);
>>> +
>>> +static int get_ufid_adjust_stats(const ovs_u128 *ufid,
>>> + struct dpif_flow_stats *stats);
>>
>> No need to specify parameter names in prototypes.
>
> I know, but all forward declarations in this c file have them included, so I
> decided to do the same.
> So I kept them for now, but let me know if you think they should be removed.
>
>>> +
>>> static bool
>>> is_internal_port(const char *type)
>>> {
>>> @@ -193,6 +199,9 @@ static struct ovs_mutex ufid_lock =
>>> OVS_MUTEX_INITIALIZER;
>>> * @ufid: ufid assigned to the flow
>>> * @id: tc filter id (tcf_id)
>>> * @netdev: netdev associated with the tc rule
>>> + * @adjust_stats: When flow gets updated with new actions, we need to
>>> adjust
>>> + * the reported stats to include previous values as the
>>> hardware
>>> + * rule is removed and re-added. This stats copy is used
>>> for it.
>>> */
>>> struct ufid_tc_data {
>>> struct hmap_node ufid_to_tc_node;
>>> @@ -200,6 +209,7 @@ struct ufid_tc_data {
>>> ovs_u128 ufid;
>>> struct tcf_id id;
>>> struct netdev *netdev;
>>> + struct dpif_flow_stats adjust_stats;
>>> };
>>>
>>> static void
>>> @@ -233,12 +243,37 @@ del_ufid_tc_mapping(const ovs_u128 *ufid)
>>> ovs_mutex_unlock(&ufid_lock);
>>> }
>>>
>>> +static void
>>> +netdev_tc_adjust_stats(struct dpif_flow_stats *stats,
>>> + struct dpif_flow_stats *adjust_stats)
>>
>> The adjust_stats pointer could be const.
>
> Will do in next rev.
>
>>> +{
>>> + /* Do not try to restore the stats->used, as in terse
>>> + * mode dumps we will always report them as 0.
>>
>> I'm not sure I understand that part. Why we will report them
>> as zero?
>
> If we do a terse dump, we will not process the TLV holding this value, and we
> will return 0.
> The revalidator code will override/update this value in udpif_update_used().
>
>
>> Not restoring the 'used' might not be a problem for a general
>> case, because freshly gathered stats will have more up to date
>> 'used' value, but that might be a problem for the 'never' case
>> where updated fow was not used after modification but it was
>> used before.
>
> For the revalidator use case it should always return 0, so it will take care
> of it with udpif_update_used(). If we return the last used value retriefd
> during the tc_get_flower() in del_filter_and_ufid_mapping(). It causes the
> revalidator process to fail, as it will always report the wrong/old value (as
> it would be larger than 0, which was what I had before).
Hrm, I see.
Maybe re-word the comment to clarify that TC doesn't report
TCA_ACT_OPTIONS in terse dumps, so the 'lastused' value is
not available?
>
>>> + * tcp_flags is not used by tc, so no need to update it. */
s/not used/not collected/ ?
>>> + stats->n_bytes += adjust_stats->n_bytes;
>>> + stats->n_packets += adjust_stats->n_packets;
>>> +}
>>> +
>>> /* Wrapper function to delete filter and ufid tc mapping */
>>> static int
>>> -del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid)
>>> +del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid,
>>> + struct dpif_flow_stats *stats)
>>> {
>>> + struct tc_flower flower;
>>> int err;
>>>
>>> + if (stats) {
>>> + memset(stats, 0, sizeof *stats);
>>> + if (!tc_get_flower(id, &flower)) {
>>> + struct dpif_flow_stats adjust_stats;
>>> +
>>> + parse_tc_flower_to_stats(&flower, stats);
>>> + if (!get_ufid_adjust_stats(ufid, &adjust_stats)) {
>>> + netdev_tc_adjust_stats(stats, &adjust_stats);
>>> + }
>>> + }
>>> + }
>>> +
>>> err = tc_del_filter(id);
>>> if (!err) {
>>> del_ufid_tc_mapping(ufid);
>>> @@ -249,7 +284,7 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const
>>> ovs_u128 *ufid)
>>> /* Add ufid entry to ufid_to_tc hashmap. */
>>> static void
>>> add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
>>> - struct tcf_id *id)
>>> + struct tcf_id *id, struct dpif_flow_stats *stats)
>>> {
>>> struct ufid_tc_data *new_data = xzalloc(sizeof *new_data);
>>> size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
>>> @@ -261,6 +296,9 @@ add_ufid_tc_mapping(struct netdev *netdev, const
>>> ovs_u128 *ufid,
>>> new_data->ufid = *ufid;
>>> new_data->id = *id;
>>> new_data->netdev = netdev_ref(netdev);
>>> + if (stats) {
>>> + new_data->adjust_stats = *stats;
>>> + }
>>>
>>> ovs_mutex_lock(&ufid_lock);
>>> hmap_insert(&ufid_to_tc, &new_data->ufid_to_tc_node, ufid_hash);
>>> @@ -292,6 +330,30 @@ get_ufid_tc_mapping(const ovs_u128 *ufid, struct
>>> tcf_id *id)
>>> return ENOENT;
>>> }
>>>
>>> +/* Get adjust_stats from ufid_to_tc hashmap.
>>> + *
>>> + * Returns 0 if successful and fills stats with adjust_stats.
>>> + * Otherwise returns the error.
>>> +*/
>>> +static int
>>> +get_ufid_adjust_stats(const ovs_u128 *ufid, struct dpif_flow_stats *stats)
>>> +{
>>> + size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
>>> + struct ufid_tc_data *data;
>>> +
>>> + ovs_mutex_lock(&ufid_lock);
>>> + HMAP_FOR_EACH_WITH_HASH (data, ufid_to_tc_node, ufid_hash,
>>> &ufid_to_tc) {
>>> + if (ovs_u128_equals(*ufid, data->ufid)) {
>>> + *stats = data->adjust_stats;
>>> + ovs_mutex_unlock(&ufid_lock);
>>> + return 0;
>>> + }
>>> + }
>>> + ovs_mutex_unlock(&ufid_lock);
>>> +
>>> + return ENOENT;
>>> +}
>>> +
>>> /* Find ufid entry in ufid_to_tc hashmap using tcf_id id.
>>> * The result is saved in ufid.
>>> *
>>> @@ -1193,6 +1255,7 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump
>>> *dump,
>>> get_tc_qdisc_hook(netdev));
>>>
>>> while (nl_dump_next(dump->nl_dump, &nl_flow, rbuffer)) {
>>> + struct dpif_flow_stats adjust_stats;
>>> struct tc_flower flower;
>>>
>>> if (parse_netlink_to_tc_flower(&nl_flow, &id, &flower,
>>> dump->terse)) {
>>> @@ -1210,6 +1273,10 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump
>>> *dump,
>>> continue;
>>> }
>>>
>>> + if (!get_ufid_adjust_stats(ufid, &adjust_stats)) {
>>> + netdev_tc_adjust_stats(stats, &adjust_stats);
>>> + }
>>> +
>>> match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
>>> match->flow.in_port.odp_port = dump->port;
>>> match_set_recirc_id(match, id.chain);
>>> @@ -2058,6 +2125,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct
>>> match *match,
>>> struct flow *mask = &match->wc.masks;
>>> const struct flow_tnl *tnl = &match->flow.tunnel;
>>> struct flow_tnl *tnl_mask = &mask->tunnel;
>>> + struct dpif_flow_stats adjust_stats;
>>> bool recirc_act = false;
>>> uint32_t block_id = 0;
>>> struct tcf_id id;
>>> @@ -2351,10 +2419,12 @@ netdev_tc_flow_put(struct netdev *netdev, struct
>>> match *match,
>>> return EOPNOTSUPP;
>>> }
>>>
>>> + memset(&adjust_stats, 0, sizeof adjust_stats);
>>> if (get_ufid_tc_mapping(ufid, &id) == 0) {
>>> VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d",
>>> id.handle, id.prio);
>>> - info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id,
>>> ufid);
>>> + info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(
>>> + &id, ufid, &adjust_stats);
>>> }
>>>
>>> prio = get_prio_for_tc_flower(&flower);
>>> @@ -2372,8 +2442,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct
>>> match *match,
>>> if (!err) {
>>> if (stats) {
>>> memset(stats, 0, sizeof *stats);
>>> + netdev_tc_adjust_stats(stats, &adjust_stats);
>>> }
>>> - add_ufid_tc_mapping(netdev, ufid, &id);
>>> + add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats);
>>> }
>>>
>>> return err;
>>> @@ -2414,6 +2485,13 @@ netdev_tc_flow_get(struct netdev *netdev,
>>> parse_tc_flower_to_match(netdev, &flower, match, actions,
>>> stats, attrs, buf, false);
>>>
>>> + if (stats) {
>>> + struct dpif_flow_stats adjust_stats;
>>> +
>>> + if (!get_ufid_adjust_stats(ufid, &adjust_stats)) {
>>> + netdev_tc_adjust_stats(stats, &adjust_stats);
>>> + }
>>> + }
>>> match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
>>> match->flow.in_port.odp_port = in_port;
>>> match_set_recirc_id(match, id.chain);
>>> @@ -2426,7 +2504,6 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
>>> const ovs_u128 *ufid,
>>> struct dpif_flow_stats *stats)
>>> {
>>> - struct tc_flower flower;
>>> struct tcf_id id;
>>> int error;
>>>
>>> @@ -2435,16 +2512,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
>>> return error;
>>> }
>>>
>>> - if (stats) {
>>> - memset(stats, 0, sizeof *stats);
>>> - if (!tc_get_flower(&id, &flower)) {
>>> - parse_tc_flower_to_stats(&flower, stats);
>>> - }
>>> - }
>>> -
>>> - error = del_filter_and_ufid_mapping(&id, ufid);
>>> -
>>> - return error;
>>> + return del_filter_and_ufid_mapping(&id, ufid, stats);
>>> }
>>>
>>> static int
>>> diff --git a/lib/tc.h b/lib/tc.h
>>> index a828fd3e3..404726f14 100644
>>> --- a/lib/tc.h
>>> +++ b/lib/tc.h
>>> @@ -343,7 +343,6 @@ static inline bool
>>> is_tcf_id_eq(struct tcf_id *id1, struct tcf_id *id2)
>>> {
>>> return id1->prio == id2->prio
>>> - && id1->handle == id2->handle
>>> && id1->handle == id2->handle
>>> && id1->hook == id2->hook
>>> && id1->block_id == id2->block_id
>>> diff --git a/tests/system-offloads-traffic.at
>>> b/tests/system-offloads-traffic.at
>>> index 1a6057080..6532e439e 100644
>>> --- a/tests/system-offloads-traffic.at
>>> +++ b/tests/system-offloads-traffic.at
>>> @@ -395,7 +395,7 @@ AT_CHECK([cat p4.pcap | awk 'NF{print $NF}' | uniq -c |
>>> awk '{$1=$1;print}'], [0
>>> # This test verifies the total packet counters work when individual
>>> branches
>>> # are taken.
>>>
>>> -AT_CHECK([ovs-appctl revalidator/wait], [0])
>>> +AT_CHECK([ovs-appctl revalidator/purge], [0])
>>> AT_CHECK([ovs-ofctl del-flows br0])
>>> AT_DATA([flows.txt], [dnl
>>> table=0,in_port=2 actions=output:1
>>> @@ -416,8 +416,8 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 10 -i 0.1 -w 2 -s
>>> 1024 10.1.1.2 | FORMAT_PIN
>>> ], [], [ovs-appctl dpctl/dump-flows; ovs-ofctl dump-flows br0])
>>>
>>> AT_CHECK([ovs-appctl dpctl/dump-flows | grep "eth_type(0x0800)" |
>>> DUMP_CLEAN_SORTED | sed 's/bytes:11440/bytes:11720/'], [0], [dnl
>>
>> This check has a bytes counter adjustment for older kernels.
>> The numbers are liely incorrect now.
>> Also, is this change adjustment necessary? You seem to not follow
>> this pattern in the newly introduced tests.
>
> Yes, I messed this up, I should have changed this line, instead of the one
> below :(
>
> I kept this pattern in because if Nvidia/Kernel folks ever fix the counters
> ;) I will also add this to the new test (I was planning on doing this when
> the tests patch gets in, but might as well do it now ;)
>
>>> -in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), packets:20, bytes:11720,
>>> used:0.001s, actions:check_pkt_len(size=200,gt(3),le(3))
>>> -in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:20, bytes:11720,
>>> used:0.001s, actions:output
>>> +in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), packets:19, bytes:11348,
>>> used:0.001s, actions:check_pkt_len(size=200,gt(3),le(3))
>>> +in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:19, bytes:11348,
>>> used:0.001s, actions:output
>>> ])
>>>
>>>
>>> @@ -490,7 +490,7 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 10 -i 0.1 -w 2 -s
>>> 1024 10.1.1.2 | FORMAT_PIN
>>> OVS_CHECK_ACTIONS([check_pkt_len(size=200,gt(3,5),le(3,4))])
>>>
>>>
>>> -AT_CHECK([ovs-appctl revalidator/wait], [0])
>>> +AT_CHECK([ovs-appctl revalidator/purge], [0])
>>> AT_CHECK([ovs-ofctl del-flows br0])
>>> AT_DATA([flows.txt], [dnl
>>> table=0,in_port=2 actions=output:1
>>> @@ -517,7 +517,7 @@ NS_CHECK_EXEC([at_ns1], [ping -q -c 10 -i 0.1 -w 2 -s
>>> 1024 10.1.1.2 | FORMAT_PIN
>>>
>>> AT_CHECK([ovs-appctl dpctl/dump-flows type=tc,offloaded | grep
>>> "eth_type(0x0800)" | DUMP_CLEAN_SORTED | sed -e
>>> 's/bytes:11348/bytes:11614/' -e 's/bytes:11440/bytes:11720/'], [0], [dnl
>>
>> Same here.
>
> See above.
>
>
> Let me know if you need to discuss more, if not I’ll try to send a v4
> tomorrow.
>
> //Eelco
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev