On 19 Dec 2024, at 14:42, Adrián Moreno wrote:

> On Tue, Dec 17, 2024 at 04:21:52PM +0100, Eelco Chaudron wrote:
>> This patch installs a kernel return probe on ovs_dp_upcall() to record
>> all successful and failed calls per CPU. The statistics are reported
>> when the script exits, providing insights into the upcall behavior.
>>
>> This is an example output:
>>
>>   # UPCALL STATISTICS (TOTAL [CPU_ID: UPCALL_PER_CPU, ...]):
>>     Total upcalls       : 183247 [0: 8937, 2: 14464, 4: 10372, 6: 4254, ...]
>>     Successfull upcalls : 120195 [0: 5945, 2: 6379, 4: 5966, 6: 4254, ...]
>>     Failed upcalls      : 63052 [0: 2992, 2: 8085, 4: 4406, 8: 2275, ...]
>>        11, EAGAIN       : 63052 [0: 2992, 2: 8085, 4: 4406, 8: 2275, ...]
>>
>> Signed-off-by: Eelco Chaudron <[email protected]>
>>
>> ---
>>  v3: - Fixed flake8 error.
>>  v2: - Cleanup and use per-CPU hash vs per-CPU array for counters.
>> ---
>>  utilities/automake.mk                   |    1
>>  utilities/usdt-scripts/kernel_delay.py  |  132 
>> +++++++++++++++++++++++++++++++
>>  utilities/usdt-scripts/kernel_delay.rst |    9 ++
>>  3 files changed, 141 insertions(+), 1 deletion(-)
>>
>> diff --git a/utilities/automake.mk b/utilities/automake.mk
>> index acc1af4e0..22260b254 100644
>> --- a/utilities/automake.mk
>> +++ b/utilities/automake.mk
>> @@ -149,6 +149,7 @@ FLAKE8_PYFILES += utilities/ovs-pcap.in \
>>      utilities/ovs-pipegen.py \
>>      utilities/usdt-scripts/dpif_op_nl_monitor.py \
>>      utilities/usdt-scripts/flow_reval_monitor.py \
>> +    utilities/usdt-scripts/kernel_delay.py \
>>      utilities/usdt-scripts/upcall_monitor.py \
>>      utilities/usdt-scripts/upcall_cost.py
>>
>> diff --git a/utilities/usdt-scripts/kernel_delay.py 
>> b/utilities/usdt-scripts/kernel_delay.py
>> index de6b0c9de..9caf4bd62 100755
>> --- a/utilities/usdt-scripts/kernel_delay.py
>> +++ b/utilities/usdt-scripts/kernel_delay.py
>> @@ -39,6 +39,7 @@
>>  #
>>  import argparse
>>  import datetime
>> +import errno
>>  import os
>>  import pytz
>>  import psutil
>> @@ -556,6 +557,36 @@ TRACEPOINT_PROBE(irq, softirq_exit)
>>      data->start_ns = 0;
>>      return 0;
>>  }
>> +
>> +
>> +/*
>> + * For measuring upcall statistics (per CPU).
>> + */
>> +BPF_PERCPU_HASH(upcall_count);
>> +
>> +#if <INSTALL_OVS_DP_UPCALL_PROBE>
>> +int kretprobe__ovs_dp_upcall(struct pt_regs *ctx)
>> +{
>> +    int ret = PT_REGS_RC(ctx);
>> +    u64 zero = 0;
>> +    u64 *entry;
>> +    u64 key;
>> +
>> +    if (!capture_enabled__())
>> +        return 0;
>> +
>> +    if (ret >= 0)
>> +        key = 0;
>> +    else
>> +        key = -ret;
>> +
>> +    entry = upcall_count.lookup_or_try_init(&key, &zero);
>> +    if (entry)
>> +        *entry += 1;
>> +
>> +    return 0;
>> +}
>> +#endif /* For measuring upcall statistics/errors. */
>>  """
>>
>>
>> @@ -887,6 +918,7 @@ def reset_capture():
>>      bpf["stack_traces"].clear()
>>      bpf["stop_start"].clear()
>>      bpf["stop_data"].clear()
>> +    bpf["upcall_count"].clear()
>>
>>
>>  #
>> @@ -900,6 +932,93 @@ def print_timestamp(msg):
>>      print(time_string)
>>
>>
>> +#
>> +# Get errno short string
>> +#
>> +def get_errno_short(err):
>> +    try:
>> +        return errno.errorcode[err]
>> +    except KeyError:
>> +        return "_unknown_"
>> +
>> +
>> +#
>> +# Format a eBPF per-cpu array entry (if the count is > 0)
>> +#
>> +def format_per_cpu_array(cpu_array, key=None, skip_key=None):
>
> nit: it's no longer an array.

Good catch, changed it to hash.
>> +
>> +    if key is not None:
>> +        total = cpu_array.sum(key).value
>> +        if total > 0:
>> +            for cpu, value in enumerate(cpu_array.getvalue(key)):
>> +                if value == 0:
>> +                    continue
>> +
>> +                per_cpu += " {}: {},".format(cpu, value)
>> +    else:
>> +        total = 0
>> +        total_cpu = None
>> +
>> +        for key in cpu_array.keys():
>> +            if skip_key is not None and skip_key.value == key.value:
>> +                continue
>> +
>> +            if total_cpu is None:
>> +                total_cpu = [0] * len(cpu_array.getvalue(key))
>> +
>> +            for cpu, value in enumerate(cpu_array.getvalue(key)):
>> +                total_cpu[cpu] += value
>> +                total += value
>> +
>> +        if total >= 0:
>> +            for cpu, value in enumerate(total_cpu):
>
> Doing some further testing I got this:
>
> Traceback (most recent call last):
>   File "/home/amorenoz/src/ovs/utilities/usdt-scripts/./kernel_delay.py",
> line 1612, in <module>
>     main()
>   File "/home/amorenoz/src/ovs/utilities/usdt-scripts/./kernel_delay.py",
> line 1571, in main
>     process_results(syscall_events=syscall_events,
>   File "/home/amorenoz/src/ovs/utilities/usdt-scripts/./kernel_delay.py",
> line 1198, in process_results
>     display_upcall_results()
>   File "/home/amorenoz/src/ovs/utilities/usdt-scripts/./kernel_delay.py",
> line 1014, in display_upcall_results
>     total, per_cpu = format_per_cpu_array(upcalls, skip_key=k)
>                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>   File "/home/amorenoz/src/ovs/utilities/usdt-scripts/./kernel_delay.py",
> line 975, in format_per_cpu_array
>     for cpu, value in enumerate(total_cpu):
>                       ^^^^^^^^^^^^^^^^^^^^
> TypeError: 'NoneType' object is not iterable
>
> In my test all upcalls where successful, so when trying to list Failed
> upcalls, "skip_key" is 0, there are no more keys in "cpu_array.keys()"
> and "total_cpu" is never set.

Forgot to test v3 with no errors, did test it with random errors ;)

This is fixed in v4, also cleaned up the output, only displaying the total in 
case of no errors.

>> +                if value == 0:
>> +                    continue
>> +
>> +                per_cpu += " {}: {},".format(cpu, value)
>> +
>> +    return total, per_cpu.strip(", ")
>> +
>> +
>> +#
>> +# Display kernel upcall statistics
>> +#
>> +def display_upcall_results():
>> +    upcalls = bpf["upcall_count"]
>> +    have_upcalls = False
>> +
>> +    for k in upcalls:
>> +        if upcalls.sum(k).value == 0:
>> +            continue
>> +        have_upcalls = True
>> +        break
>> +
>> +    if not have_upcalls:
>> +        return
>> +
>> +    print("\n\n# UPCALL STATISTICS (TOTAL [CPU_ID: UPCALL_PER_CPU, ...]):\n"
>
> nit (only because I guess a new version is required and acknowledging
> this was my proposal):
> s/UPCALL_PER_CPU/N_UPCALLS_PER_CPU/?

Done. A v4 was sent out.

>> +        "  Total upcalls       : {} [{}]".format(
>> +            *format_per_cpu_array(upcalls)))
>> +
>> +    for k in sorted(upcalls, key=lambda x: int(x.value)):
>> +        error = k.value
>> +        total, per_cpu = format_per_cpu_array(upcalls, key=k)
>> +
>> +        if error != 0 and total == 0:
>> +            continue
>> +
>> +        if error == 0:
>> +            print("  Successfull upcalls : {} [{}]".format(total, per_cpu))
>> +
>> +            total, per_cpu = format_per_cpu_array(upcalls, skip_key=k)
>> +            print("  Failed upcalls      : {} [{}]".format(total, per_cpu))
>> +        else:
>> +            print("    {:3}, {:13}: {} [{}]".format(error,
>> +                                                    get_errno_short(error),
>> +                                                    total, per_cpu))
>> +
>> +
>>  #
>>  # process_results()
>>  #
>> @@ -1074,7 +1193,12 @@ def process_results(syscall_events=None, 
>> trigger_delta=None):
>>                  indent, "TOTAL:", "", total_count, total_ns))
>>
>>      #
>> -    # Print events
>> +    # Print upcall statistics
>> +    #
>> +    display_upcall_results()
>> +
>> +    #
>> +    # Print syscall events
>>      #
>>      lost_stack_traces = 0
>>      if syscall_events:
>> @@ -1194,6 +1318,9 @@ def main():
>>      parser.add_argument("--skip-syscall-poll-events",
>>                          help="Skip poll() syscalls with --syscall-events",
>>                          action="store_true")
>> +    parser.add_argument("--skip-upcall-stats",
>> +                        help="Skip the collection of upcall statistics",
>> +                        action="store_true")
>>      parser.add_argument("--stack-trace-size",
>>                          help="Number of unique stack traces that can be "
>>                          "recorded, default 4096. 0 to disable",
>> @@ -1298,6 +1425,9 @@ def main():
>>      source = source.replace("<STACK_TRACE_ENABLED>", "true"
>>                              if options.stack_trace_size > 0 else "false")
>>
>> +    source = source.replace("<INSTALL_OVS_DP_UPCALL_PROBE>", "0"
>> +                            if options.skip_upcall_stats else "1")
>> +
>>      #
>>      # Handle start/stop probes
>>      #
>> diff --git a/utilities/usdt-scripts/kernel_delay.rst 
>> b/utilities/usdt-scripts/kernel_delay.rst
>> index 0f6f916a7..41620d760 100644
>> --- a/utilities/usdt-scripts/kernel_delay.rst
>> +++ b/utilities/usdt-scripts/kernel_delay.rst
>> @@ -109,6 +109,7 @@ followed by resource-specific data. Which are:
>>  - ``THREAD STOPPED STATISTICS``
>>  - ``HARD IRQ STATISTICS``
>>  - ``SOFT IRQ STATISTICS``
>> +- ``UPCALL STATISTICS``
>>
>>  The following sections will describe in detail what statistics they report.
>>
>> @@ -187,6 +188,14 @@ number of interrupts (``COUNT``), the total time spent 
>> in the interrupt
>>  handler (``TOTAL ns``), and the worst-case duration (``MAX ns``).
>>
>>
>> +``UPCALL STATISTICS``
>> +~~~~~~~~~~~~~~~~~~~~~
>> +The ``UPCALL STATISTICS`` section provides information on the number of
>> +upcalls sent by the kernel to userspace. If any upcalls fail to be sent,
>> +the specific return codes are recorded. Statistics are presented both as
>> +a total and on a per-CPU basis.
>> +
>> +
>>  The ``--syscall-events`` option
>>  -------------------------------
>>  In addition to reporting global syscall statistics in 
>> ``SYSCALL_STATISTICS``,
>>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to