Re: [RFC PATCH 4/9] surface_aggregator: Add trace points

2020-09-23 Thread Maximilian Luz

On 9/23/20 10:07 PM, Steven Rostedt wrote:

On Wed, 23 Sep 2020 17:15:06 +0200
Maximilian Luz  wrote:


Add trace points to the Surface Aggregator subsystem core. These trace
points can be used to track packets, requests, and allocations. They are
further intended for debugging and testing/validation, specifically in
combination with the error injection capabilities introduced in the
subsequent commit.


I'm impressed! This uses some of the advanced features of the tracing
infrastructure. But I still have some comments to make about the layout
of the TP_STRUCT__entry() fields.


Thanks!

[...]


+DECLARE_EVENT_CLASS(ssam_packet_class,
+   TP_PROTO(const struct ssh_packet *packet),
+
+   TP_ARGS(packet),
+
+   TP_STRUCT__entry(
+   __array(char, uid, SSAM_PTR_UID_LEN)
+   __field(u8, priority)
+   __field(u16, length)
+   __field(unsigned long, state)
+   __field(u16, seq)



Order matters above to keep the events as compact as possible. The more
compact they are, the more events you can store without loss.

Now with SSAM_PTR_UID_LEN = 9, the above is (on a 64 bit system);

9 bytes;
1 byte;
2 bytes;
8 bytes;
2 bytes;

The ftrace ring buffer is 4 byte aligned. As words and long words are
also 4 byte aligned, there's not much different to change. But it is
possible that the compiler might add 4 byte padding between the long
word "length" and "priority". Note, these are not packed structures.

Testing this out with the following code:

  $ cat << EOF > test.c
struct test {
unsigned char array[9];
unsigned char priority;
unsigned short length;
unsigned long state;
unsigned short seq;
};

static struct test x;

void receive_x(struct test *p)
{
p = 
}
EOF

  $ gcc -g -c -o test.o test.c
  $ pahole test.o
struct test {
unsigned char  array[9]; /* 0 9 */
unsigned char  priority; /* 9 1 */
short unsigned int length;   /*10 2 */

/* XXX 4 bytes hole, try to pack */

long unsigned int  state;/*16 8 */
short unsigned int seq;  /*24 2 */

/* size: 32, cachelines: 1, members: 5 */
/* sum members: 22, holes: 1, sum holes: 4 */
/* padding: 6 */
/* last cacheline: 32 bytes */
};

You do see a hole between length and state. Now if we were to move this
around a little.

  $ cat < test2.c
struct test {
unsigned long state;
unsigned char array[9];
unsigned char priority;
unsigned short length;
unsigned short seq;
};

static struct test x;

void receive_x(struct test *p)
{
p = 
}
EOF

  $ gcc -g -c -o test2 test2.c
  $ pahole test2.o
struct test {
long unsigned int  state;/* 0 8 */
unsigned char  array[9]; /* 8 9 */
unsigned char  priority; /*17 1 */
short unsigned int length;   /*18 2 */
short unsigned int seq;  /*20 2 */

/* size: 24, cachelines: 1, members: 5 */
/* padding: 2 */
/* last cacheline: 24 bytes */
};


We get a more compact structure with:

TP_STRUCT__entry(
__field(unsigned long, state)
__array(char, uid, SSAM_PTR_UID_LEN)
__field(u8, priority)
__field(u16, length)
__field(u16, seq)
),


Note, you can find pahole here:

https://git.kernel.org/pub/scm/devel/pahole/pahole.git



+   ),


Thank you for that detailed write-up! As you have clearly noticed, I
have not really looked at the struct layouts. I will fix this for v2,
include your changes, and have a look at pahole.

[...]

Thanks,
Max


Re: [RFC PATCH 4/9] surface_aggregator: Add trace points

2020-09-23 Thread Steven Rostedt
On Wed, 23 Sep 2020 17:15:06 +0200
Maximilian Luz  wrote:

> Add trace points to the Surface Aggregator subsystem core. These trace
> points can be used to track packets, requests, and allocations. They are
> further intended for debugging and testing/validation, specifically in
> combination with the error injection capabilities introduced in the
> subsequent commit.

I'm impressed! This uses some of the advanced features of the tracing
infrastructure. But I still have some comments to make about the layout
of the TP_STRUCT__entry() fields.

> 

> diff --git a/drivers/misc/surface_aggregator/trace.h 
> b/drivers/misc/surface_aggregator/trace.h
> new file mode 100644
> index ..eb2e3e1457de
> --- /dev/null
> +++ b/drivers/misc/surface_aggregator/trace.h
> @@ -0,0 +1,612 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM surface_aggregator
> +
> +#if !defined(_SURFACE_AGGREGATOR_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _SURFACE_AGGREGATOR_TRACE_H
> +
> +#include 
> +
> +#include 
> +#include 
> +
> +
> +TRACE_DEFINE_ENUM(SSH_FRAME_TYPE_DATA_SEQ);
> +TRACE_DEFINE_ENUM(SSH_FRAME_TYPE_DATA_NSQ);
> +TRACE_DEFINE_ENUM(SSH_FRAME_TYPE_ACK);
> +TRACE_DEFINE_ENUM(SSH_FRAME_TYPE_NAK);

Kudos on using TRACE_DEFINE_ENUM :-)


> +
> +#define ssam_show_generic_u8_field(value)\
> + __print_symbolic(value, \
> + { SSAM_U8_FIELD_NOT_APPLICABLE, "N/A" } \
> + )
> +
> +
> +#define ssam_show_frame_type(ty) \
> + __print_symbolic(ty,\
> + { SSH_FRAME_TYPE_DATA_SEQ,  "DSEQ" },   \
> + { SSH_FRAME_TYPE_DATA_NSQ,  "DNSQ" },   \
> + { SSH_FRAME_TYPE_ACK,   "ACK"  },   \
> + { SSH_FRAME_TYPE_NAK,   "NAK"  }\
> + )
> +
> +#define ssam_show_packet_type(type)  \
> + __print_flags(flags & SSH_PACKET_FLAGS_TY_MASK, "", \
> + { BIT(SSH_PACKET_TY_FLUSH_BIT), "F" },  \
> + { BIT(SSH_PACKET_TY_SEQUENCED_BIT), "S" },  \
> + { BIT(SSH_PACKET_TY_BLOCKING_BIT),  "B" }   \

More kudos on proper usage of __print_symbolic() and __print_flags() :-)



> +DECLARE_EVENT_CLASS(ssam_packet_class,
> + TP_PROTO(const struct ssh_packet *packet),
> +
> + TP_ARGS(packet),
> +
> + TP_STRUCT__entry(
> + __array(char, uid, SSAM_PTR_UID_LEN)
> + __field(u8, priority)
> + __field(u16, length)
> + __field(unsigned long, state)
> + __field(u16, seq)


Order matters above to keep the events as compact as possible. The more
compact they are, the more events you can store without loss.

Now with SSAM_PTR_UID_LEN = 9, the above is (on a 64 bit system);

9 bytes;
1 byte;
2 bytes;
8 bytes;
2 bytes;

The ftrace ring buffer is 4 byte aligned. As words and long words are
also 4 byte aligned, there's not much different to change. But it is
possible that the compiler might add 4 byte padding between the long
word "length" and "priority". Note, these are not packed structures.

Testing this out with the following code:

 $ cat << EOF > test.c
struct test {
unsigned char array[9];
unsigned char priority;
unsigned short length;
unsigned long state;
unsigned short seq;
};

static struct test x;

void receive_x(struct test *p)
{
p = 
}
EOF

 $ gcc -g -c -o test.o test.c
 $ pahole test.o
struct test {
unsigned char  array[9]; /* 0 9 */
unsigned char  priority; /* 9 1 */
short unsigned int length;   /*10 2 */

/* XXX 4 bytes hole, try to pack */

long unsigned int  state;/*16 8 */
short unsigned int seq;  /*24 2 */

/* size: 32, cachelines: 1, members: 5 */
/* sum members: 22, holes: 1, sum holes: 4 */
/* padding: 6 */
/* last cacheline: 32 bytes */
};

You do see a hole between length and state. Now if we were to move this
around a little.

 $ cat < test2.c
struct test {
unsigned long state;
unsigned char array[9];
unsigned char priority;
unsigned short length;
unsigned short seq;
};

static struct test x;

void receive_x(struct test *p)
{
p = 
}
EOF

 $ gcc -g -c -o test2 test2.c
 $ pahole test2.o
struct test {
long unsigned int  state;/* 0 8 */
unsigned char  array[9]; /* 8 9 */
unsigned char  priority; /*17 1 */