On Wed, Jun 5, 2024 at 7:51 PM Heng Qi <hen...@linux.alibaba.com> wrote:
>
> On Wed, 5 Jun 2024 13:30:51 +0200, Jiri Pirko <j...@resnulli.us> wrote:
> > Mon, May 20, 2024 at 02:48:15PM CEST, j...@resnulli.us wrote:
> > >Fri, May 10, 2024 at 09:11:16AM CEST, hen...@linux.alibaba.com wrote:
> > >>On Thu,  9 May 2024 13:46:15 +0200, Jiri Pirko <j...@resnulli.us> wrote:
> > >>> From: Jiri Pirko <j...@nvidia.com>
> > >>>
> > >>> Add support for Byte Queue Limits (BQL).
> > >>
> > >>Historically both Jason and Michael have attempted to support BQL
> > >>for virtio-net, for example:
> > >>
> > >>https://lore.kernel.org/netdev/21384cb5-99a6-7431-1039-b356521e1...@redhat.com/
> > >>
> > >>These discussions focus primarily on:
> > >>
> > >>1. BQL is based on napi tx. Therefore, the transfer of statistical 
> > >>information
> > >>needs to rely on the judgment of use_napi. When the napi mode is switched 
> > >>to
> > >>orphan, some statistical information will be lost, resulting in temporary
> > >>inaccuracy in BQL.
> > >>
> > >>2. If tx dim is supported, orphan mode may be removed and tx irq will be 
> > >>more
> > >>reasonable. This provides good support for BQL.
> > >
> > >But when the device does not support dim, the orphan mode is still
> > >needed, isn't it?
> >
> > Heng, is my assuption correct here? Thanks!
> >
>
> Maybe, according to our cloud data, napi_tx=on works better than orphan mode 
> in
> most scenarios. Although orphan mode performs better in specific benckmark,

For example pktgen (I meant even if the orphan mode can break pktgen,
it can finish when there's a new packet that needs to be sent after
pktgen is completed).

> perf of napi_tx can be enhanced through tx dim. Then, there is no reason not 
> to
> support dim for devices that want the best performance.

Ideally, if we can drop orphan mode, everything would be simplified.

>
> Back to this patch set, I think BQL as a point that affects performance should
> deserve more comprehensive test data.

Thanks

>
> Thanks.
>
> > >
> > >>
> > >>Thanks.
> > >>
> > >>>
> > >>> Signed-off-by: Jiri Pirko <j...@nvidia.com>
> > >>> ---
> > >>>  drivers/net/virtio_net.c | 33 ++++++++++++++++++++-------------
> > >>>  1 file changed, 20 insertions(+), 13 deletions(-)
> > >>>
> > >>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > >>> index 218a446c4c27..c53d6dc6d332 100644
> > >>> --- a/drivers/net/virtio_net.c
> > >>> +++ b/drivers/net/virtio_net.c
> > >>> @@ -84,7 +84,9 @@ struct virtnet_stat_desc {
> > >>>
> > >>>  struct virtnet_sq_free_stats {
> > >>>   u64 packets;
> > >>> + u64 xdp_packets;
> > >>>   u64 bytes;
> > >>> + u64 xdp_bytes;
> > >>>  };
> > >>>
> > >>>  struct virtnet_sq_stats {
> > >>> @@ -512,19 +514,19 @@ static void __free_old_xmit(struct send_queue 
> > >>> *sq, bool in_napi,
> > >>>   void *ptr;
> > >>>
> > >>>   while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> > >>> -         ++stats->packets;
> > >>> -
> > >>>           if (!is_xdp_frame(ptr)) {
> > >>>                   struct sk_buff *skb = ptr;
> > >>>
> > >>>                   pr_debug("Sent skb %p\n", skb);
> > >>>
> > >>> +                 stats->packets++;
> > >>>                   stats->bytes += skb->len;
> > >>>                   napi_consume_skb(skb, in_napi);
> > >>>           } else {
> > >>>                   struct xdp_frame *frame = ptr_to_xdp(ptr);
> > >>>
> > >>> -                 stats->bytes += xdp_get_frame_len(frame);
> > >>> +                 stats->xdp_packets++;
> > >>> +                 stats->xdp_bytes += xdp_get_frame_len(frame);
> > >>>                   xdp_return_frame(frame);
> > >>>           }
> > >>>   }
> > >>> @@ -965,7 +967,8 @@ static void virtnet_rq_unmap_free_buf(struct 
> > >>> virtqueue *vq, void *buf)
> > >>>   virtnet_rq_free_buf(vi, rq, buf);
> > >>>  }
> > >>>
> > >>> -static void free_old_xmit(struct send_queue *sq, bool in_napi)
> > >>> +static void free_old_xmit(struct send_queue *sq, struct netdev_queue 
> > >>> *txq,
> > >>> +                   bool in_napi)
> > >>>  {
> > >>>   struct virtnet_sq_free_stats stats = {0};
> > >>>
> > >>> @@ -974,9 +977,11 @@ static void free_old_xmit(struct send_queue *sq, 
> > >>> bool in_napi)
> > >>>   /* Avoid overhead when no packets have been processed
> > >>>    * happens when called speculatively from start_xmit.
> > >>>    */
> > >>> - if (!stats.packets)
> > >>> + if (!stats.packets && !stats.xdp_packets)
> > >>>           return;
> > >>>
> > >>> + netdev_tx_completed_queue(txq, stats.packets, stats.bytes);
> > >>> +
> > >>>   u64_stats_update_begin(&sq->stats.syncp);
> > >>>   u64_stats_add(&sq->stats.bytes, stats.bytes);
> > >>>   u64_stats_add(&sq->stats.packets, stats.packets);
> > >>> @@ -1013,13 +1018,15 @@ static void check_sq_full_and_disable(struct 
> > >>> virtnet_info *vi,
> > >>>    * early means 16 slots are typically wasted.
> > >>>    */
> > >>>   if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
> > >>> -         netif_stop_subqueue(dev, qnum);
> > >>> +         struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
> > >>> +
> > >>> +         netif_tx_stop_queue(txq);
> > >>>           if (use_napi) {
> > >>>                   if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
> > >>>                           virtqueue_napi_schedule(&sq->napi, sq->vq);
> > >>>           } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
> > >>>                   /* More just got used, free them then recheck. */
> > >>> -                 free_old_xmit(sq, false);
> > >>> +                 free_old_xmit(sq, txq, false);
> > >>>                   if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
> > >>>                           netif_start_subqueue(dev, qnum);
> > >>>                           virtqueue_disable_cb(sq->vq);
> > >>> @@ -2319,7 +2326,7 @@ static void virtnet_poll_cleantx(struct 
> > >>> receive_queue *rq)
> > >>>
> > >>>           do {
> > >>>                   virtqueue_disable_cb(sq->vq);
> > >>> -                 free_old_xmit(sq, true);
> > >>> +                 free_old_xmit(sq, txq, true);
> > >>>           } while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
> > >>>
> > >>>           if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
> > >>> @@ -2471,7 +2478,7 @@ static int virtnet_poll_tx(struct napi_struct 
> > >>> *napi, int budget)
> > >>>   txq = netdev_get_tx_queue(vi->dev, index);
> > >>>   __netif_tx_lock(txq, raw_smp_processor_id());
> > >>>   virtqueue_disable_cb(sq->vq);
> > >>> - free_old_xmit(sq, true);
> > >>> + free_old_xmit(sq, txq, true);
> > >>>
> > >>>   if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
> > >>>           netif_tx_wake_queue(txq);
> > >>> @@ -2553,7 +2560,7 @@ static netdev_tx_t start_xmit(struct sk_buff 
> > >>> *skb, struct net_device *dev)
> > >>>   struct send_queue *sq = &vi->sq[qnum];
> > >>>   int err;
> > >>>   struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
> > >>> - bool kick = !netdev_xmit_more();
> > >>> + bool xmit_more = netdev_xmit_more();
> > >>>   bool use_napi = sq->napi.weight;
> > >>>
> > >>>   /* Free up any pending old buffers before queueing new ones. */
> > >>> @@ -2561,9 +2568,9 @@ static netdev_tx_t start_xmit(struct sk_buff 
> > >>> *skb, struct net_device *dev)
> > >>>           if (use_napi)
> > >>>                   virtqueue_disable_cb(sq->vq);
> > >>>
> > >>> -         free_old_xmit(sq, false);
> > >>> +         free_old_xmit(sq, txq, false);
> > >>>
> > >>> - } while (use_napi && kick &&
> > >>> + } while (use_napi && !xmit_more &&
> > >>>          unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
> > >>>
> > >>>   /* timestamp packet in software */
> > >>> @@ -2592,7 +2599,7 @@ static netdev_tx_t start_xmit(struct sk_buff 
> > >>> *skb, struct net_device *dev)
> > >>>
> > >>>   check_sq_full_and_disable(vi, dev, sq);
> > >>>
> > >>> - if (kick || netif_xmit_stopped(txq)) {
> > >>> + if (__netdev_tx_sent_queue(txq, skb->len, xmit_more)) {
> > >>>           if (virtqueue_kick_prepare(sq->vq) && 
> > >>> virtqueue_notify(sq->vq)) {
> > >>>                   u64_stats_update_begin(&sq->stats.syncp);
> > >>>                   u64_stats_inc(&sq->stats.kicks);
> > >>> --
> > >>> 2.44.0
> > >>>
> > >>>
>


Reply via email to