On Tue, Nov 15, 2022 at 2:04 AM TATSUKAWA KOSUKE(立川 江介)
<[email protected]> wrote:
>
> Hi,
>
> I was using nfsslower in bcc tools and noticed that it doesn't print out
> slow commit operations, which sometimes cause long delays in NFS writes.
> I tried adding code to trace slow commit operations and created the
> attached patch.

The concept and code looks okay to me. One thing is you could use
RAW_TRACEPOINT_PROBE macro to be consistent with some other
bcc tools raw tracepoint usages.

Please submit a pull request.

>
> Please let me hear your opinions.
>
> NFS commits are quite slow and are not performed often, so the patch
> shouldn't incur visible overhead.
>
> Below is a sample output from the modified tool when writing a large
> file using dd command.  Most commits are issued from kworker threads,
> so the commit output may be mostly dropped when filtered by PID.
>
> TIME     COMM           PID    T BYTES   OFF_KB   LAT(ms) FILENAME
> ...
> 16:46:11 dd             1212   W 1048576 651264     46.58 testfile
> 16:46:11 dd             1212   W 1048576 653312     54.41 testfile
> 16:46:11 dd             1212   W 1048576 654336     18.96 testfile
> 16:46:11 dd             1212   W 1048576 655360     49.05 testfile
> 16:46:11 dd             1212   W 1048576 657408     82.96 testfile
> 16:46:11 dd             1212   W 1048576 659456    109.25 testfile
> 16:46:12 dd             1212   W 1048576 660480    163.55 testfile
> 16:46:12 dd             1212   W 1048576 662528    205.44 testfile
> 16:46:13 dd             1212   W 1048576 663552    751.02 testfile
> 16:46:37 kworker/u8:5   1207   C 0       0        27449.05 testfile
> 16:46:37 kworker/u8:5   1207   C 0       0        26725.16 testfile
> 16:46:37 kworker/u8:5   1207   C 0       0        27592.04 testfile
> 16:46:37 kworker/u8:4   1206   C 0       0        22188.75 testfile
> 16:46:37 kworker/u8:4   1206   C 0       0        26092.59 testfile
> 16:46:37 kworker/u8:4   1206   C 0       0        27268.90 testfile
> 16:46:37 kworker/u8:4   1206   C 0       0        22303.24 testfile
> 16:46:37 kworker/u8:4   1206   C 0       0        27081.34 testfile
> 16:46:37 dd             1212   W 1048576 664576   24337.80 testfile
> 16:46:38 dd             1212   W 1048576 958464     61.77 testfile
> 16:46:38 dd             1212   W 1048576 960512     56.60 testfile
> 16:46:38 dd             1212   W 1048576 963584     55.75 testfile
> 16:46:38 dd             1212   W 1048576 965632     54.84 testfile
> ...
>
> Best regards.
> ---
> Kosuke TATSUKAWA  | 1st Platform Software Division
>                   | NEC Solution Innovators
>                   | [email protected]
>
> ------------------------------------------------------------------------
> diff -rup a/tools/nfsslower.py b/tools/nfsslower.py
> --- a/tools/nfsslower   2022-11-14 12:39:23.677073240 +0900
> +++ b/tools/nfsslower   2022-11-14 14:44:49.960073240 +0900
> @@ -9,6 +9,8 @@
>  # This script traces some common NFS operations: read, write, opens and
>  # getattr. It measures the time spent in these operations, and prints details
>  # for each that exceeded a threshold.
> +# The script also traces commit operations, which is specific to nfs and 
> could
> +# be pretty slow.
>  #
>  # WARNING: This adds low-overhead instrumentation to these NFS operations,
>  # including reads and writes from the file system cache. Such reads and 
> writes
> @@ -25,6 +27,8 @@
>  # Currently there aren't any tracepoints available for nfs_read_file,
>  # nfs_write_file and nfs_open_file, nfs_getattr does have entry and exit
>  # tracepoints but we chose to use kprobes for consistency
> +# Raw tracepoints are used to trace nfs:nfs_initiate_commit and
> +# nfs:nfs_commit_done.
>  #
>  # 31-Aug-2017   Samuel Nair created this. Should work with NFSv{3,4}
>
> @@ -41,8 +45,8 @@ examples = """
>      ./nfsslower -p 121  # trace pid 121 only
>  """
>  parser = argparse.ArgumentParser(
> -    description="""Trace READ, WRITE, OPEN \
> -and GETATTR NFS calls slower than a threshold,\
> +    description="""Trace READ, WRITE, OPEN, GETATTR \
> +and COMMIT NFS calls slower than a threshold,\
>  supports NFSv{3,4}""",
>      formatter_class=argparse.RawDescriptionHelpFormatter,
>      epilog=examples)
> @@ -66,11 +70,13 @@ bpf_text = """
>  #include <linux/fs.h>
>  #include <linux/sched.h>
>  #include <linux/dcache.h>
> +#include <linux/nfs_fs.h>
>
>  #define TRACE_READ 0
>  #define TRACE_WRITE 1
>  #define TRACE_OPEN 2
>  #define TRACE_GETATTR 3
> +#define TRACE_COMMIT 4
>
>  struct val_t {
>      u64 ts;
> @@ -79,6 +85,12 @@ struct val_t {
>      struct dentry *d;
>  };
>
> +struct commit_t {
> +    u64 ts;
> +    u64 offset;
> +    u64 count;
> +};
> +
>  struct data_t {
>      // XXX: switch some to u32's when supported
>      u64 ts_us;
> @@ -93,6 +105,7 @@ struct data_t {
>
>  BPF_HASH(entryinfo, u64, struct val_t);
>  BPF_PERF_OUTPUT(events);
> +BPF_HASH(commitinfo, u64, struct commit_t);
>
>  int trace_rw_entry(struct pt_regs *ctx, struct kiocb *iocb,
>                                  struct iov_iter *data)
> @@ -227,6 +240,56 @@ int trace_getattr_return(struct pt_regs
>      return trace_exit(ctx, TRACE_GETATTR);
>  }
>
> +int raw_nfs_initiate_commit(struct bpf_raw_tracepoint_args *ctx) {
> +    u64 key = (u64)ctx->args[0];
> +    struct nfs_commit_data *cd = (struct nfs_commit_data *)key;
> +    struct commit_t c = { 0 };
> +
> +    c.ts = bpf_ktime_get_ns();
> +    bpf_probe_read_kernel(&c.offset, sizeof(cd->args.offset),
> +        &cd->args.offset);
> +    bpf_probe_read_kernel(&c.count, sizeof(cd->args.count), &cd->args.count);
> +    commitinfo.update(&key, &c);
> +    return 0;
> +}
> +
> +int raw_nfs_commit_done(struct bpf_raw_tracepoint_args *ctx) {
> +    u64 key = (u64)ctx->args[1];
> +    struct commit_t *cp = commitinfo.lookup(&key);
> +
> +    if (cp) {
> +        struct nfs_commit_data *cd = (struct nfs_commit_data *)key;
> +        struct nfs_open_context *p;
> +        struct dentry *de;
> +        struct qstr qs;
> +        u64 ts = bpf_ktime_get_ns();
> +        u64 delta_us = (ts - cp->ts) / 1000;
> +        u32 pid = bpf_get_current_pid_tgid() >> 32;
> +        struct data_t data = {.type = TRACE_COMMIT, .offset = cp->offset,
> +            .size = cp->count, .ts_us = ts/1000, .delta_us = delta_us,
> +            .pid = pid};
> +
> +        commitinfo.delete(&key);
> +        bpf_get_current_comm(&data.task, sizeof(data.task));
> +
> +        if(FILTER_PID)
> +            return 0;
> +
> +        if (FILTER_US)
> +            return 0;
> +
> +        bpf_probe_read_kernel(&p, sizeof(p), &cd->context);
> +        bpf_probe_read_kernel(&de, sizeof(de), &p->dentry);
> +        bpf_probe_read_kernel(&qs, sizeof(qs), &de->d_name);
> +        if (qs.len) {
> +            bpf_probe_read_kernel(&data.file, sizeof(data.file),
> +                (void *)qs.name);
> +            events.perf_submit(ctx, &data, sizeof(data));
> +        }
> +    }
> +    return 0;
> +}
> +
>  """
>  if min_ms == 0:
>      bpf_text = bpf_text.replace('FILTER_US', '0')
> @@ -253,6 +316,8 @@ def print_event(cpu, data, size):
>          type = 'O'
>      elif event.type == 3:
>          type = 'G'
> +    elif event.type == 4:
> +        type = 'C'
>
>      if(csv):
>          print("%d,%s,%d,%s,%d,%d,%d,%s" % (
> @@ -273,7 +338,18 @@ def print_event(cpu, data, size):
>  # Currently specifically works for NFSv4, the other kprobes are generic
>  # so it should work with earlier NFS versions
>
> -b = BPF(text=bpf_text)
> +# The following warning is shown on kernels after linux-5.18 when using bcc.
> +# Add compile option to silence it.
> +#   In file included from /virtual/main.c:7:
> +#   In file included from include/linux/nfs_fs.h:31:
> +#   In file included from include/linux/sunrpc/auth.h:13:
> +#   In file included from include/linux/sunrpc/sched.h:19:
> +#   include/linux/sunrpc/xdr.h:751:10: warning: result of comparison of 
> constant 4611686018427387903 with expression of type '__u32' (aka 'unsigned 
> int') is always false [-Wtautological-constant-out-of-range-compare]
> +#           if (len > SIZE_MAX / sizeof(*p))
> +#               ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~
> +#   1 warning generated.
> +b = BPF(text=bpf_text,
> +    cflags=["-Wno-tautological-constant-out-of-range-compare"])
>  b.attach_kprobe(event="nfs_file_read", fn_name="trace_rw_entry")
>  b.attach_kprobe(event="nfs_file_write", fn_name="trace_rw_entry")
>  b.attach_kprobe(event="nfs4_file_open", fn_name="trace_file_open_entry")
> @@ -286,6 +362,9 @@ b.attach_kretprobe(event="nfs4_file_open
>  b.attach_kretprobe(event="nfs_file_open", fn_name="trace_file_open_return")
>  b.attach_kretprobe(event="nfs_getattr", fn_name="trace_getattr_return")
>
> +b.attach_raw_tracepoint("nfs_initiate_commit", "raw_nfs_initiate_commit")
> +b.attach_raw_tracepoint("nfs_commit_done", "raw_nfs_commit_done")
> +
>  if(csv):
>      print("ENDTIME_us,TASK,PID,TYPE,BYTES,OFFSET_b,LATENCY_us,FILE")
>  else:
>
> 
>
>


-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#2024): https://lists.iovisor.org/g/iovisor-dev/message/2024
Mute This Topic: https://lists.iovisor.org/mt/95039760/21656
Group Owner: [email protected]
Unsubscribe: https://lists.iovisor.org/g/iovisor-dev/unsub 
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-


Reply via email to