The feature supports two coalescing modes:
- Time-based: delay notifications up to N microseconds
- Count-based: delay until N packets are processed

Implementation details:
- Added VIRTIO_NET_CTRL_NOTF_COAL class handling in control virtqueue
- RX path: batches notifications based on packet count or timeout
- TX path: leverages the unified dispatcher to dynamically enable
  timer-based coalescing when guest configures it via ethtool
- Coalescing parameters persist across live migration

Note: During VM launch if we provide tx=timer, then coalescing feature
will not be active. As, user already introduce delay 150ms.

Signed-off-by: Koushik Dutta <[email protected]>
---
 hw/net/virtio-net.c            | 144 ++++++++++++++++++++++++++++++++-
 include/hw/virtio/virtio-net.h |   7 ++
 net/passt.c                    |   1 +
 net/tap.c                      |   1 +
 net/vhost-user.c               |   1 +
 net/vhost-vdpa.c               |   1 +
 6 files changed, 154 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 319842cf28..3dbd2f4c11 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -157,6 +157,16 @@ static void flush_or_purge_queued_packets(NetClientState 
*nc)
  * - we could suppress RX interrupt if we were so inclined.
  */
 
+static void virtio_net_rx_notify(void *opaque)
+{
+    VirtIONetQueue *q = opaque;
+    VirtIONet *n = q->n;
+    VirtIODevice *vdev = VIRTIO_DEVICE(n);
+
+    n->rx_pkt_cnt = 0;
+    virtio_notify(vdev, q->rx_vq);
+}
+
 static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
 {
     VirtIONet *n = VIRTIO_NET(vdev);
@@ -1004,6 +1014,62 @@ static void virtio_net_set_features(VirtIODevice *vdev,
 
 static void virtio_net_tx_timer(void *opaque);
 
+static int virtio_net_handle_coal(VirtIONet *n, uint8_t cmd,
+                                  struct iovec *iov, unsigned int iov_cnt)
+{
+    struct virtio_net_ctrl_coal coal;
+    VirtIONetQueue *q;
+    size_t s;
+    int i;
+
+    s = iov_to_buf(iov, iov_cnt, 0, &coal, sizeof(coal));
+    if (s != sizeof(coal)) {
+        return VIRTIO_NET_ERR;
+    }
+
+    if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_RX_SET) {
+        n->rx_coal_usecs = le32_to_cpu(coal.max_usecs);
+        n->rx_coal_packets = le32_to_cpu(coal.max_packets);
+        for (i = 0; i < n->max_queue_pairs; i++) {
+            q = &n->vqs[i];
+            if (n->rx_coal_usecs > 0) {
+                if (!q->rx_timer) {
+                    q->rx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                               virtio_net_rx_notify,
+                                               q);
+                }
+            } else {
+                if (q->rx_timer) {
+                    timer_free(q->rx_timer);
+                    q->rx_timer = NULL;
+                }
+            }
+        }
+    } else if (cmd == VIRTIO_NET_CTRL_NOTF_COAL_TX_SET) {
+        n->tx_coal_usecs = le32_to_cpu(coal.max_usecs);
+        n->tx_coal_packets = le32_to_cpu(coal.max_packets);
+        /* Converted us to ns */
+        n->tx_timeout = n->tx_coal_usecs * 1000;
+        for (i = 0; i < n->max_queue_pairs; i++) {
+            q = &n->vqs[i];
+            if (n->tx_coal_usecs > 0) {
+                if (!q->tx_timer) {
+                    q->tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                               virtio_net_tx_timer,
+                                               q);
+                }
+            } else {
+                if (q->tx_timer) {
+                    timer_free(q->tx_timer);
+                    q->tx_timer = NULL;
+                }
+           }
+        }
+    }
+
+    return VIRTIO_NET_OK;
+}
+
 static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
                                      struct iovec *iov, unsigned int iov_cnt)
 {
@@ -1583,6 +1649,8 @@ size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
         status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
     } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
         status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
+    } else if (ctrl.class == VIRTIO_NET_CTRL_NOTF_COAL) {
+        status = virtio_net_handle_coal(n, ctrl.cmd, iov, out_num);
     }
 
     s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
@@ -2042,7 +2110,23 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
     }
 
     virtqueue_flush(q->rx_vq, i);
-    virtio_notify(vdev, q->rx_vq);
+
+    /* rx coalescing */
+    n->rx_pkt_cnt += i;
+    if (n->rx_coal_usecs == 0 || n->rx_pkt_cnt >= n->rx_coal_packets) {
+        if (q->rx_timer) {
+            timer_del(q->rx_timer);
+        }
+        virtio_net_rx_notify(q);
+    } else {
+        if (q->rx_timer) {
+            if (!timer_pending(q->rx_timer)) {
+                timer_mod(q->rx_timer,
+                          qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+                          n->rx_coal_usecs * 1000);
+            }
+        }
+    }
 
     return size;
 
@@ -2900,6 +2984,12 @@ static void virtio_net_tx_timer(void *opaque)
     if (ret == -EBUSY || ret == -EINVAL) {
         return;
     }
+    if (n->tx_pkt_cnt < ret) {
+        n->tx_pkt_cnt = 0;
+    } else {
+        n->tx_pkt_cnt -= ret;
+    }
+
     /*
      * If we flush a full burst of packets, assume there are
      * more coming and immediately rearm
@@ -2919,6 +3009,7 @@ static void virtio_net_tx_timer(void *opaque)
     ret = virtio_net_flush_tx(q);
     if (ret > 0) {
         virtio_queue_set_notification(q->tx_vq, 0);
+        n->tx_pkt_cnt -= ret;
         q->tx_waiting = 1;
         timer_mod(q->tx_timer,
                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
@@ -2985,6 +3076,20 @@ static void virtio_net_handle_tx_dispatch(VirtIODevice 
*vdev, VirtQueue *vq)
                                        virtio_net_tx_timer, q);
         }
         virtio_net_handle_tx_timer(vdev, vq);
+    } else if (n->tx_coal_usecs > 0) {
+        n->tx_pkt_cnt++;
+        if (n->tx_pkt_cnt < n->tx_coal_packets) {
+            if (q->tx_timer) {
+                virtio_net_handle_tx_timer(vdev, vq);
+                return;
+            }
+        } else {
+            n->tx_pkt_cnt = 0;
+            if (q->tx_timer) {
+                timer_del(q->tx_timer);
+            }
+            virtio_net_handle_tx_bh(vdev, vq);
+        }
     } else {
         virtio_net_handle_tx_bh(vdev, vq);
     }
@@ -3098,6 +3203,9 @@ static void virtio_net_get_features(VirtIODevice *vdev, 
uint64_t *features,
     virtio_features_or(features, features, n->host_features_ex);
 
     virtio_add_feature_ex(features, VIRTIO_NET_F_MAC);
+    if (n->tx_timer_activate) {
+        virtio_clear_feature_ex(features, VIRTIO_NET_F_NOTF_COAL);
+    }
 
     if (!peer_has_vnet_hdr(n)) {
         virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM);
@@ -3252,6 +3360,29 @@ static int virtio_net_post_load_device(void *opaque, int 
version_id)
     }
 
     virtio_net_commit_rss_config(n);
+    if (n->tx_coal_usecs > 0 || n->rx_coal_usecs > 0) {
+
+        for (i = 0; i < n->max_queue_pairs; i++) {
+            VirtIONetQueue *q = &n->vqs[i];
+            if (n->rx_coal_usecs > 0) {
+                if (!q->rx_timer) {
+                    q->rx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                               virtio_net_rx_notify,
+                                               q);
+                }
+            }
+
+            if (n->tx_coal_usecs > 0) {
+                n->tx_timeout = n->tx_coal_usecs * 1000;
+                if (!q->tx_timer) {
+                    q->tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                                               virtio_net_tx_timer,
+                                               q);
+                }
+            }
+        }
+    }
+
     return 0;
 }
 
@@ -3627,6 +3758,10 @@ static const VMStateDescription 
vmstate_virtio_net_device = {
                          vmstate_virtio_net_tx_waiting),
         VMSTATE_UINT64_TEST(curr_guest_offloads, VirtIONet,
                             has_ctrl_guest_offloads),
+        VMSTATE_UINT32(rx_coal_usecs, VirtIONet),
+        VMSTATE_UINT32(tx_coal_usecs, VirtIONet),
+        VMSTATE_UINT32(rx_coal_packets, VirtIONet),
+        VMSTATE_UINT32(tx_coal_packets, VirtIONet),
         VMSTATE_END_OF_LIST()
     },
     .subsections = (const VMStateDescription * const []) {
@@ -4060,6 +4195,11 @@ static void virtio_net_device_realize(DeviceState *dev, 
Error **errp)
             n->rss_data.specified_hash_types.on_bits |
             n->rss_data.specified_hash_types.auto_bits;
     }
+    n->rx_pkt_cnt = 0;
+    n->tx_pkt_cnt = 0;
+    n->rx_coal_usecs = 0;
+    n->rx_coal_packets = 0;
+    n->tx_coal_packets = 0;
 }
 
 static void virtio_net_device_unrealize(DeviceState *dev)
@@ -4272,6 +4412,8 @@ static const Property virtio_net_properties[] = {
                       VIRTIO_NET_F_GUEST_USO6, true),
     DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
                       VIRTIO_NET_F_HOST_USO, true),
+    DEFINE_PROP_BIT64("vq_notf_coal", VirtIONet, host_features,
+                      VIRTIO_NET_F_NOTF_COAL, true),
     DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet,
                                   rss_data.specified_hash_types,
                                   VIRTIO_NET_HASH_REPORT_IPv4 - 1,
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index a4eb3f407e..9612416622 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -159,6 +159,7 @@ typedef struct VirtIONetQueue {
     VirtQueue *rx_vq;
     VirtQueue *tx_vq;
     QEMUTimer *tx_timer;
+    QEMUTimer *rx_timer;
     QEMUBH *tx_bh;
     uint32_t tx_waiting;
     struct {
@@ -230,6 +231,12 @@ struct VirtIONet {
     struct EBPFRSSContext ebpf_rss;
     uint32_t nr_ebpf_rss_fds;
     char **ebpf_rss_fds;
+    uint32_t rx_coal_usecs; /* RX interrupt coalescing timeout (microseconds) 
*/
+    uint32_t rx_coal_packets; /* RX packet count threshold for coalescing */
+    uint32_t rx_pkt_cnt; /* Current RX packet count since last notification */
+    uint32_t tx_coal_usecs; /* TX interrupt coalescing timeout (microseconds) 
*/
+    uint32_t tx_coal_packets; /* TX packet count threshold for coalescing */
+    uint32_t tx_pkt_cnt; /* Current TX packet count since last notification */
     bool tx_timer_activate;
 };
 
diff --git a/net/passt.c b/net/passt.c
index 45440c399b..43b36ed8c5 100644
--- a/net/passt.c
+++ b/net/passt.c
@@ -52,6 +52,7 @@ static const int user_feature_bits[] = {
     VIRTIO_NET_F_GUEST_USO4,
     VIRTIO_NET_F_GUEST_USO6,
     VIRTIO_NET_F_HOST_USO,
+    VIRTIO_NET_F_NOTF_COAL,
 
     /* This bit implies RARP isn't sent by QEMU out of band */
     VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/net/tap.c b/net/tap.c
index 57ffb09885..e6ddbc1eb1 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -63,6 +63,7 @@ static const int kernel_feature_bits[] = {
     VIRTIO_F_NOTIFICATION_DATA,
     VIRTIO_NET_F_RSC_EXT,
     VIRTIO_NET_F_HASH_REPORT,
+    VIRTIO_NET_F_NOTF_COAL,
     VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO,
     VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO,
     VHOST_INVALID_FEATURE_BIT
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 2d0fc49b4d..f1e9b7a038 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -54,6 +54,7 @@ static const int user_feature_bits[] = {
     VIRTIO_NET_F_GUEST_USO4,
     VIRTIO_NET_F_GUEST_USO6,
     VIRTIO_NET_F_HOST_USO,
+    VIRTIO_NET_F_NOTF_COAL,
 
     /* This bit implies RARP isn't sent by QEMU out of band */
     VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index f1523697e2..0dcd6fb9f1 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -70,6 +70,7 @@ static const int vdpa_feature_bits[] = {
     VIRTIO_NET_F_CTRL_RX,
     VIRTIO_NET_F_CTRL_RX_EXTRA,
     VIRTIO_NET_F_CTRL_VLAN,
+    VIRTIO_NET_F_NOTF_COAL,
     VIRTIO_NET_F_CTRL_VQ,
     VIRTIO_NET_F_GSO,
     VIRTIO_NET_F_GUEST_CSUM,
-- 
2.53.0


Reply via email to