在 2026-01-15 16:21:20,"Michael S. Tsirkin" <[email protected]> 写道:
>On Wed, Jan 14, 2026 at 08:36:27PM +0800, Longjun Tang wrote:
>> From: Longjun Tang <[email protected]>
>>
>> To facilitate tracking the status of virtqueue during rx/tx and
>> interrupt response, add trace point at corresponding func.
>>
>> Also,to avoid perf hured,it olny available under debug condition.
>>
>> Signed-off-by: Longjun Tang <[email protected]>
>> ---
>> v1:
>> I did some tests with iperf3, as follow:
>> host <---> vm
>>
>> oringnal:
>> rx pps: 90915 rx bitrate: 28.5 Gbits/s
>> tx pps: 54659 tx bitrate: 26.4 Gbits/s
>>
>> added trace:
>> rx pps: 76810 rx bitrate: 26.7 Gbits/s
>> tx pps: 54455 tx bitrate: 26.4 Gbits/s
>
>probably because you are reading a lot of cache cold
>data and copying it to stack just to discard it when
>trace is disabled?
>
>what if the tracepoints are much ligher weigh, just recording
>the vq number?
Recording only the vq index does offer some improvement, with RX PPS around
80,000,
but there's still a performance hurt compared to the original. Probably only in
DEBUG mode to use.
>
>
>> Based on the results of the above tests, adding tracing has a
>> performance hurt, especially on the rx side. Therefore, these
>> traces should only be added to the debug virtio_net.
>>
>> When these traces are needed, uncomment the DEBUG macro definition
>> in driver/net/virtio_net.c.
>> ---
>> drivers/net/virtio_net.c | 21 ++++-
>> drivers/net/virtio_net_trace.h | 124 ++++++++++++++++++++++++++
>> drivers/virtio/virtio_ring.c | 155 --------------------------------
>> include/linux/virtio_ring.h | 156 +++++++++++++++++++++++++++++++++
>> 4 files changed, 297 insertions(+), 159 deletions(-)
>> create mode 100644 drivers/net/virtio_net_trace.h
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 22d894101c01..fabd3e8acb36 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -27,6 +27,11 @@
>> #include <net/netdev_queues.h>
>> #include <net/xdp_sock_drv.h>
>>
>> +#if defined(DEBUG)
>> +#define CREATE_TRACE_POINTS
>> +#include "virtio_net_trace.h"
>> +#endif
>> +
>> static int napi_weight = NAPI_POLL_WEIGHT;
>> module_param(napi_weight, int, 0444);
>>
>> @@ -795,7 +800,9 @@ static void skb_xmit_done(struct virtqueue *vq)
>> unsigned int index = vq2txq(vq);
>> struct send_queue *sq = &vi->sq[index];
>> struct napi_struct *napi = &sq->napi;
>> -
>> +#if defined(DEBUG)
>> + trace_skb_xmit_done(napi, vq);
>> +#endif
>> /* Suppress further interrupts. */
>> virtqueue_disable_cb(vq);
>>
>> @@ -2671,7 +2678,9 @@ static void receive_buf(struct virtnet_info *vi,
>> struct receive_queue *rq,
>>
>> if (unlikely(!skb))
>> return;
>> -
>> +#if defined(DEBUG)
>> + trace_receive_buf(rq->vq, skb);
>> +#endif
>> virtnet_receive_done(vi, rq, skb, flags);
>> }
>>
>> @@ -2877,7 +2886,9 @@ static void skb_recv_done(struct virtqueue *rvq)
>> {
>> struct virtnet_info *vi = rvq->vdev->priv;
>> struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
>> -
>> +#if defined(DEBUG)
>> + trace_skb_recv_done(&rq->napi, rvq);
>> +#endif
>> rq->calls++;
>> virtqueue_napi_schedule(&rq->napi, rvq);
>> }
>> @@ -3389,7 +3400,9 @@ static netdev_tx_t start_xmit(struct sk_buff *skb,
>> struct net_device *dev)
>>
>> /* timestamp packet in software */
>> skb_tx_timestamp(skb);
>> -
>> +#if defined(DEBUG)
>> + trace_start_xmit(sq->vq, skb);
>
>Pls prefix APIs with virtio_net or something.
how about trace_virtio_net_start_xmit ?
>
>> +#endif
>> /* Try to transmit */
>> err = xmit_skb(sq, skb, !use_napi);
>>
>
>> diff --git a/drivers/net/virtio_net_trace.h b/drivers/net/virtio_net_trace.h
>> new file mode 100644
>> index 000000000000..0a008cb8f51d
>> --- /dev/null
>> +++ b/drivers/net/virtio_net_trace.h
>> @@ -0,0 +1,124 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#if defined(DEBUG)
>> +
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM virtio_net
>> +
>> +#if !defined(_TRACE_VIRTIO_NET_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_VIRTIO_NET_H
>> +
>> +#include <linux/virtio.h>
>> +#include <linux/tracepoint.h>
>> +#include <linux/virtio_ring.h>
>> +
>> +DECLARE_EVENT_CLASS(virtio_net_rxtx_temp,
>> +
>> + TP_PROTO(struct virtqueue *vq, const struct sk_buff *skb),
>> +
>> + TP_ARGS(vq, skb),
>> +
>> + TP_STRUCT__entry(
>> + __string( name, vq->name )
>> + __field( unsigned int, num_free )
>> + __field( unsigned int, index )
>> + __field( bool, packed_ring )
>> + __field( bool, broken )
>> + __field( bool, event )
>> + __field( u16, last_used_idx )
>> + __field( __virtio16, avail_flags )
>> + __field( __virtio16, avail_idx )
>> + __field( __virtio16, used_idx )
>> + __field( const void *, skbaddr )
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->skbaddr = skb;
>> +
>> + __assign_str(name);
>> + __entry->num_free = vq->num_free;
>> + __entry->index = vq->index;
>> +
>> + struct vring_virtqueue *vvq;
>> +
>> + vvq = container_of_const(vq, struct vring_virtqueue, vq);
>> +
>> + __entry->packed_ring = vvq->packed_ring;
>> + __entry->broken = vvq->broken;
>> + __entry->event = vvq->event;
>> + if (!vvq->packed_ring) {
>> + __entry->last_used_idx = vvq->last_used_idx;
>> + __entry->avail_flags = vvq->split.vring.avail->flags;
>> + __entry->avail_idx = vvq->split.vring.avail->idx;
>> + __entry->used_idx = vvq->split.vring.used->idx;
>> + }
>> + ),
>> +
>> + TP_printk("skbaddr=%p vq=%s num_free=%u index=%u packed=%d broken=%d
>> event=%d last_used_idx=%u avail_flags=%u avail_idx=%u used_idx=%u",
>> + __entry->skbaddr, __get_str(name), __entry->num_free,
>> __entry->index,
>> + __entry->packed_ring, __entry->broken, __entry->event,
>> __entry->last_used_idx,
>> + __entry->avail_flags, __entry->avail_idx, __entry->used_idx)
>> +)
>> +
>> +DEFINE_EVENT(virtio_net_rxtx_temp, receive_buf,
>> +
>> + TP_PROTO(struct virtqueue *vq, const struct sk_buff *skb),
>> +
>> + TP_ARGS(vq, skb)
>> +);
>> +
>> +DEFINE_EVENT(virtio_net_rxtx_temp, start_xmit,
>> +
>> + TP_PROTO(struct virtqueue *vq, const struct sk_buff *skb),
>> +
>> + TP_ARGS(vq, skb)
>> +);
>> +
>> +DECLARE_EVENT_CLASS(virtio_net_int_temp,
>> +
>> + TP_PROTO(struct napi_struct *napi, struct virtqueue *vq),
>> +
>> + TP_ARGS(napi, vq),
>> +
>> + TP_STRUCT__entry(
>> + __string( dev_name, napi->dev->name )
>> + __string( vq_name, vq->name)
>> + __field( u32, napi_id )
>> + __field( int, weight )
>> + ),
>> +
>> + TP_fast_assign(
>> + __assign_str(dev_name);
>> + __assign_str(vq_name);
>> + __entry->napi_id = napi->napi_id;
>> + __entry->weight = napi->weight;
>> + ),
>> +
>> + TP_printk("dev=%s vq=%s napi_id=%u weight=%d",
>> + __get_str(dev_name), __get_str(vq_name), __entry->napi_id,
>> __entry->weight)
>> +)
>> +
>> +DEFINE_EVENT(virtio_net_int_temp, skb_xmit_done,
>> +
>> + TP_PROTO(struct napi_struct *napi, struct virtqueue *vq),
>> +
>> + TP_ARGS(napi, vq)
>> +);
>> +
>> +DEFINE_EVENT(virtio_net_int_temp, skb_recv_done,
>> +
>> + TP_PROTO(struct napi_struct *napi, struct virtqueue *vq),
>> +
>> + TP_ARGS(napi, vq)
>> +);
>> +
>> +#undef TRACE_INCLUDE_PATH
>> +#define TRACE_INCLUDE_PATH ../../drivers/net/
>> +#undef TRACE_INCLUDE_FILE
>> +#define TRACE_INCLUDE_FILE virtio_net_trace
>> +
>> +#endif /* _TRACE_VIRTIO_NET_H */
>> +
>> +/* This part must be outside protection */
>> +#include <trace/define_trace.h>
>> +
>> +#endif
>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>> index ddab68959671..d413ebb42729 100644
>> --- a/drivers/virtio/virtio_ring.c
>> +++ b/drivers/virtio/virtio_ring.c
>> @@ -67,161 +67,6 @@
>> #define LAST_ADD_TIME_INVALID(vq)
>> #endif
>>
>> -struct vring_desc_state_split {
>> - void *data; /* Data for callback. */
>> -
>> - /* Indirect desc table and extra table, if any. These two will be
>> - * allocated together. So we won't stress more to the memory allocator.
>> - */
>> - struct vring_desc *indir_desc;
>> -};
>> -
>> -struct vring_desc_state_packed {
>> - void *data; /* Data for callback. */
>> -
>> - /* Indirect desc table and extra table, if any. These two will be
>> - * allocated together. So we won't stress more to the memory allocator.
>> - */
>> - struct vring_packed_desc *indir_desc;
>> - u16 num; /* Descriptor list length. */
>> - u16 last; /* The last desc state in a list. */
>> -};
>> -
>> -struct vring_desc_extra {
>> - dma_addr_t addr; /* Descriptor DMA addr. */
>> - u32 len; /* Descriptor length. */
>> - u16 flags; /* Descriptor flags. */
>> - u16 next; /* The next desc state in a list. */
>> -};
>> -
>> -struct vring_virtqueue_split {
>> - /* Actual memory layout for this queue. */
>> - struct vring vring;
>> -
>> - /* Last written value to avail->flags */
>> - u16 avail_flags_shadow;
>> -
>> - /*
>> - * Last written value to avail->idx in
>> - * guest byte order.
>> - */
>> - u16 avail_idx_shadow;
>> -
>> - /* Per-descriptor state. */
>> - struct vring_desc_state_split *desc_state;
>> - struct vring_desc_extra *desc_extra;
>> -
>> - /* DMA address and size information */
>> - dma_addr_t queue_dma_addr;
>> - size_t queue_size_in_bytes;
>> -
>> - /*
>> - * The parameters for creating vrings are reserved for creating new
>> - * vring.
>> - */
>> - u32 vring_align;
>> - bool may_reduce_num;
>> -};
>> -
>> -struct vring_virtqueue_packed {
>> - /* Actual memory layout for this queue. */
>> - struct {
>> - unsigned int num;
>> - struct vring_packed_desc *desc;
>> - struct vring_packed_desc_event *driver;
>> - struct vring_packed_desc_event *device;
>> - } vring;
>> -
>> - /* Driver ring wrap counter. */
>> - bool avail_wrap_counter;
>> -
>> - /* Avail used flags. */
>> - u16 avail_used_flags;
>> -
>> - /* Index of the next avail descriptor. */
>> - u16 next_avail_idx;
>> -
>> - /*
>> - * Last written value to driver->flags in
>> - * guest byte order.
>> - */
>> - u16 event_flags_shadow;
>> -
>> - /* Per-descriptor state. */
>> - struct vring_desc_state_packed *desc_state;
>> - struct vring_desc_extra *desc_extra;
>> -
>> - /* DMA address and size information */
>> - dma_addr_t ring_dma_addr;
>> - dma_addr_t driver_event_dma_addr;
>> - dma_addr_t device_event_dma_addr;
>> - size_t ring_size_in_bytes;
>> - size_t event_size_in_bytes;
>> -};
>> -
>> -struct vring_virtqueue {
>> - struct virtqueue vq;
>> -
>> - /* Is this a packed ring? */
>> - bool packed_ring;
>> -
>> - /* Is DMA API used? */
>> - bool use_map_api;
>> -
>> - /* Can we use weak barriers? */
>> - bool weak_barriers;
>> -
>> - /* Other side has made a mess, don't try any more. */
>> - bool broken;
>> -
>> - /* Host supports indirect buffers */
>> - bool indirect;
>> -
>> - /* Host publishes avail event idx */
>> - bool event;
>> -
>> - /* Head of free buffer list. */
>> - unsigned int free_head;
>> - /* Number we've added since last sync. */
>> - unsigned int num_added;
>> -
>> - /* Last used index we've seen.
>> - * for split ring, it just contains last used index
>> - * for packed ring:
>> - * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
>> - * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap
>> counter.
>> - */
>> - u16 last_used_idx;
>> -
>> - /* Hint for event idx: already triggered no need to disable. */
>> - bool event_triggered;
>> -
>> - union {
>> - /* Available for split ring */
>> - struct vring_virtqueue_split split;
>> -
>> - /* Available for packed ring */
>> - struct vring_virtqueue_packed packed;
>> - };
>> -
>> - /* How to notify other side. FIXME: commonalize hcalls! */
>> - bool (*notify)(struct virtqueue *vq);
>> -
>> - /* DMA, allocation, and size information */
>> - bool we_own_ring;
>> -
>> - union virtio_map map;
>> -
>> -#ifdef DEBUG
>> - /* They're supposed to lock for us. */
>> - unsigned int in_use;
>> -
>> - /* Figure out if their kicks are too delayed. */
>> - bool last_add_time_valid;
>> - ktime_t last_add_time;
>> -#endif
>> -};
>> -
>> static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
>> static void vring_free(struct virtqueue *_vq);
>>
>> diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
>> index c97a12c1cda3..1442a02209ec 100644
>> --- a/include/linux/virtio_ring.h
>> +++ b/include/linux/virtio_ring.h
>> @@ -121,4 +121,160 @@ void vring_transport_features(struct virtio_device
>> *vdev);
>> irqreturn_t vring_interrupt(int irq, void *_vq);
>>
>> u32 vring_notification_data(struct virtqueue *_vq);
>> +
>> +struct vring_desc_state_split {
>> + void *data; /* Data for callback. */
>> +
>> + /* Indirect desc table and extra table, if any. These two will be
>> + * allocated together. So we won't stress more to the memory allocator.
>> + */
>> + struct vring_desc *indir_desc;
>> +};
>> +
>> +struct vring_desc_state_packed {
>> + void *data; /* Data for callback. */
>> +
>> + /* Indirect desc table and extra table, if any. These two will be
>> + * allocated together. So we won't stress more to the memory allocator.
>> + */
>> + struct vring_packed_desc *indir_desc;
>> + u16 num; /* Descriptor list length. */
>> + u16 last; /* The last desc state in a list. */
>> +};
>> +
>> +struct vring_desc_extra {
>> + dma_addr_t addr; /* Descriptor DMA addr. */
>> + u32 len; /* Descriptor length. */
>> + u16 flags; /* Descriptor flags. */
>> + u16 next; /* The next desc state in a list. */
>> +};
>> +
>> +struct vring_virtqueue_split {
>> + /* Actual memory layout for this queue. */
>> + struct vring vring;
>> +
>> + /* Last written value to avail->flags */
>> + u16 avail_flags_shadow;
>> +
>> + /*
>> + * Last written value to avail->idx in
>> + * guest byte order.
>> + */
>> + u16 avail_idx_shadow;
>> +
>> + /* Per-descriptor state. */
>> + struct vring_desc_state_split *desc_state;
>> + struct vring_desc_extra *desc_extra;
>> +
>> + /* DMA address and size information */
>> + dma_addr_t queue_dma_addr;
>> + size_t queue_size_in_bytes;
>> +
>> + /*
>> + * The parameters for creating vrings are reserved for creating new
>> + * vring.
>> + */
>> + u32 vring_align;
>> + bool may_reduce_num;
>> +};
>> +
>> +struct vring_virtqueue_packed {
>> + /* Actual memory layout for this queue. */
>> + struct {
>> + unsigned int num;
>> + struct vring_packed_desc *desc;
>> + struct vring_packed_desc_event *driver;
>> + struct vring_packed_desc_event *device;
>> + } vring;
>> +
>> + /* Driver ring wrap counter. */
>> + bool avail_wrap_counter;
>> +
>> + /* Avail used flags. */
>> + u16 avail_used_flags;
>> +
>> + /* Index of the next avail descriptor. */
>> + u16 next_avail_idx;
>> +
>> + /*
>> + * Last written value to driver->flags in
>> + * guest byte order.
>> + */
>> + u16 event_flags_shadow;
>> +
>> + /* Per-descriptor state. */
>> + struct vring_desc_state_packed *desc_state;
>> + struct vring_desc_extra *desc_extra;
>> +
>> + /* DMA address and size information */
>> + dma_addr_t ring_dma_addr;
>> + dma_addr_t driver_event_dma_addr;
>> + dma_addr_t device_event_dma_addr;
>> + size_t ring_size_in_bytes;
>> + size_t event_size_in_bytes;
>> +};
>> +
>> +struct vring_virtqueue {
>> + struct virtqueue vq;
>> +
>> + /* Is this a packed ring? */
>> + bool packed_ring;
>> +
>> + /* Is DMA API used? */
>> + bool use_map_api;
>> +
>> + /* Can we use weak barriers? */
>> + bool weak_barriers;
>> +
>> + /* Other side has made a mess, don't try any more. */
>> + bool broken;
>> +
>> + /* Host supports indirect buffers */
>> + bool indirect;
>> +
>> + /* Host publishes avail event idx */
>> + bool event;
>> +
>> + /* Head of free buffer list. */
>> + unsigned int free_head;
>> + /* Number we've added since last sync. */
>> + unsigned int num_added;
>> +
>> + /* Last used index we've seen.
>> + * for split ring, it just contains last used index
>> + * for packed ring:
>> + * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
>> + * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap
>> counter.
>> + */
>> + u16 last_used_idx;
>> +
>> + /* Hint for event idx: already triggered no need to disable. */
>> + bool event_triggered;
>> +
>> + union {
>> + /* Available for split ring */
>> + struct vring_virtqueue_split split;
>> +
>> + /* Available for packed ring */
>> + struct vring_virtqueue_packed packed;
>> + };
>> +
>> + /* How to notify other side. FIXME: commonalize hcalls! */
>> + bool (*notify)(struct virtqueue *vq);
>> +
>> + /* DMA, allocation, and size information */
>> + bool we_own_ring;
>> +
>> + union virtio_map map;
>> +
>> +#ifdef DEBUG
>> + /* They're supposed to lock for us. */
>> + unsigned int in_use;
>> +
>> + /* Figure out if their kicks are too delayed. */
>> + bool last_add_time_valid;
>> + ktime_t last_add_time;
>> +#endif
>> +};
>> +
>
>Not excited about exporting all this stuff from virtio-ring
>internals.
If the vring_virtqueue structure doesn't need to be tracked, this stuff doesn't
need to be exported.
>
>
>> #endif /* _LINUX_VIRTIO_RING_H */
>> --
>> 2.48.1