Complements v2.6.31 commit 55782138e47d ("tracing/events: convert block
trace points to TRACE_EVENT()") to be equivalent to traditional blktrace
output. Also this allows event filtering to not always get all (un)plug
events.

NB: The NULL pointer check for q->kobj.parent is certainly racy and
I don't have enough experience if it's good enough for a trace event.
The change did work for my cases (block device read/write I/O on
zfcp-attached SCSI disks and dm-mpath on top).

While I haven't seen any prior art using driver core (parent) relations
for trace events, there are other cases using this when no direct pointer
exists between objects, such as:
 #define to_scsi_target(d)      container_of(d, struct scsi_target, dev)
 static inline struct scsi_target *scsi_target(struct scsi_device *sdev)
 {
        return to_scsi_target(sdev->sdev_gendev.parent);
 }

This is the object model we make use of here:

struct gendisk {
        struct hd_struct {
                struct device {      /*container_of*/
                        struct kobject kobj; <--+
                        dev_t  devt; /*deref*/  |
                } __dev;                        |
        } part0;                                |
        struct request_queue *queue; ..+        |
}                                      :        |
                                       :        |
struct request_queue {  <..............+        |
        /* queue kobject */                     |
        struct kobject {                        |
                struct kobject *parent; --------+
        } kobj;
}

The parent pointer comes from:
 #define disk_to_dev(disk)      (&(disk)->part0.__dev)
int blk_register_queue(struct gendisk *disk)
        struct device *dev = disk_to_dev(disk);
        struct request_queue *q = disk->queue;
        ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
                                    ^^^parent

$ ls -d /sys/block/sdf/queue
/sys/block/sda/queue
$ cat /sys/block/sdf/dev
80:0

A partition does not have its own request queue:

$ cat /sys/block/sdf/sdf1/dev
8:81
$ ls -d /sys/block/sdf/sdf1/queue
ls: cannot access '/sys/block/sdf/sdf1/queue': No such file or directory

The difference to blktrace parsed output is that block events don't use the
partition's minor number but the containing block device's minor number:

$ dd if=/dev/sdf1 count=1

$ cat /sys/kernel/debug/tracing/trace
block_bio_remap: 8,80 R 2048 + 32 <- (8,81) 0
block_bio_queue: 8,80 R 2048 + 32 [dd]
block_getrq: 8,80 R 2048 + 32 [dd]
block_plug: 8,80 [dd]
            ^^^^
block_rq_insert: 8,80 R 16384 () 2048 + 32 [dd]
block_unplug: 8,80 [dd] 1 explicit
              ^^^^
block_rq_issue: 8,80 R 16384 () 2048 + 32 [dd]
block_rq_complete: 8,80 R () 2048 + 32 [0]

$ btrace /dev/sdf1
  8,80   1        1     0.000000000 240240  A   R 2048 + 32 <- (8,81) 0
  8,81   1        2     0.000220890 240240  Q   R 2048 + 32 [dd]
  8,81   1        3     0.000229639 240240  G   R 2048 + 32 [dd]
  8,81   1        4     0.000231805 240240  P   N [dd]
    ^^
  8,81   1        5     0.000234671 240240  I   R 2048 + 32 [dd]
  8,81   1        6     0.000236365 240240  U   N [dd] 1
    ^^
  8,81   1        7     0.000238527 240240  D   R 2048 + 32 [dd]
  8,81   2        2     0.000613741     0  C   R 2048 + 32 [0]

Signed-off-by: Steffen Maier <ma...@linux.ibm.com>
---
 include/trace/events/block.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index a13613d27cee..cffedc26e8a3 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -460,14 +460,18 @@ TRACE_EVENT(block_plug,
        TP_ARGS(q),
 
        TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
                __array( char,          comm,   TASK_COMM_LEN   )
        ),
 
        TP_fast_assign(
+               __entry->dev = q->kobj.parent ?
+               container_of(q->kobj.parent, struct device, kobj)->devt : 0;
                memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
 
-       TP_printk("[%s]", __entry->comm)
+       TP_printk("%d,%d [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->comm)
 );
 
 #define show_block_unplug_explicit(val)                \
@@ -482,18 +486,23 @@ DECLARE_EVENT_CLASS(block_unplug,
        TP_ARGS(q, depth, explicit),
 
        TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
                __field( int,           nr_rq                   )
                __field( bool,          explicit                )
                __array( char,          comm,   TASK_COMM_LEN   )
        ),
 
        TP_fast_assign(
+               __entry->dev   = q->kobj.parent ?
+               container_of(q->kobj.parent, struct device, kobj)->devt : 0;
                __entry->nr_rq = depth;
                __entry->explicit = explicit;
                memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
 
-       TP_printk("[%s] %d %s", __entry->comm, __entry->nr_rq,
+       TP_printk("%d,%d [%s] %d %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->comm, __entry->nr_rq,
                  show_block_unplug_explicit(__entry->explicit))
 );
 
-- 
2.13.5

Reply via email to