Re: [PATCH net-next RFC 4/4] virtio-net: clean tx descriptors from rx napi

2017-03-06 Thread Willem de Bruijn
>> +static void virtnet_poll_cleantx(struct receive_queue *rq)
>> +{
>> +   struct virtnet_info *vi = rq->vq->vdev->priv;
>> +   unsigned int index = vq2rxq(rq->vq);
>> +   struct send_queue *sq = &vi->sq[index];
>> +   struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
>> +
>> +   __netif_tx_lock(txq, smp_processor_id());
>> +   free_old_xmit_skbs(sq, sq->napi.weight);
>> +   __netif_tx_unlock(txq);
>
>
> Should we check tx napi weight here? Or this was treated as an independent
> optimization?

Good point. This was not intended to run in no-napi mode as is.
With interrupts disabled most of the time in that mode, I don't
expect it to be worthwhile using in that case. I'll add the check
for sq->napi.weight != 0.

>> +
>> +   if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
>> +   netif_wake_subqueue(vi->dev, vq2txq(sq->vq));
>> +}
>> +
>>   static int virtnet_poll(struct napi_struct *napi, int budget)
>>   {
>> struct receive_queue *rq =
>> @@ -1039,6 +1056,8 @@ static int virtnet_poll(struct napi_struct *napi,
>> int budget)
>> received = virtnet_receive(rq, budget);
>>   + virtnet_poll_cleantx(rq);
>> +
>
>
> Better to do the before virtnet_receive() consider refill may allocate
> memory for rx buffers.

Will do.

> Btw, if this is proved to be more efficient. In the future we may consider
> to:
>
> 1) use a single interrupt for both rx and tx
> 2) use a single napi to handle both rx and tx

Agreed, I think that's sensible.


Re: [PATCH net-next RFC 4/4] virtio-net: clean tx descriptors from rx napi

2017-03-06 Thread Willem de Bruijn
On Mon, Mar 6, 2017 at 12:43 PM, Willem de Bruijn
 wrote:
>>> +static void virtnet_poll_cleantx(struct receive_queue *rq)
>>> +{
>>> +   struct virtnet_info *vi = rq->vq->vdev->priv;
>>> +   unsigned int index = vq2rxq(rq->vq);
>>> +   struct send_queue *sq = &vi->sq[index];
>>> +   struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
>>> +
>>> +   __netif_tx_lock(txq, smp_processor_id());
>>> +   free_old_xmit_skbs(sq, sq->napi.weight);
>>> +   __netif_tx_unlock(txq);
>>
>>
>> Should we check tx napi weight here? Or this was treated as an independent
>> optimization?
>
> Good point. This was not intended to run in no-napi mode as is.
> With interrupts disabled most of the time in that mode, I don't
> expect it to be worthwhile using in that case. I'll add the check
> for sq->napi.weight != 0.

I'm wrong here. Rx interrupts are not disabled, of course. It is
probably worth benchmarking, then.


Re: [PATCH net-next RFC 4/4] virtio-net: clean tx descriptors from rx napi

2017-03-06 Thread Jason Wang



On 2017年03月03日 22:39, Willem de Bruijn wrote:

From: Willem de Bruijn 

Amortize the cost of virtual interrupts by doing both rx and tx work
on reception of a receive interrupt. Together VIRTIO_F_EVENT_IDX and
vhost interrupt moderation, this suppresses most explicit tx
completion interrupts for bidirectional workloads.

Signed-off-by: Willem de Bruijn 
---
  drivers/net/virtio_net.c | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9a9031640179..21c575127d50 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1031,6 +1031,23 @@ static int virtnet_receive(struct receive_queue *rq, int 
budget)
return received;
  }
  
+static unsigned int free_old_xmit_skbs(struct send_queue *sq, int budget);

+
+static void virtnet_poll_cleantx(struct receive_queue *rq)
+{
+   struct virtnet_info *vi = rq->vq->vdev->priv;
+   unsigned int index = vq2rxq(rq->vq);
+   struct send_queue *sq = &vi->sq[index];
+   struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
+
+   __netif_tx_lock(txq, smp_processor_id());
+   free_old_xmit_skbs(sq, sq->napi.weight);
+   __netif_tx_unlock(txq);


Should we check tx napi weight here? Or this was treated as an 
independent optimization?



+
+   if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
+   netif_wake_subqueue(vi->dev, vq2txq(sq->vq));
+}
+
  static int virtnet_poll(struct napi_struct *napi, int budget)
  {
struct receive_queue *rq =
@@ -1039,6 +1056,8 @@ static int virtnet_poll(struct napi_struct *napi, int 
budget)
  
  	received = virtnet_receive(rq, budget);
  
+	virtnet_poll_cleantx(rq);

+


Better to do the before virtnet_receive() consider refill may allocate 
memory for rx buffers.


Btw, if this is proved to be more efficient. In the future we may 
consider to:


1) use a single interrupt for both rx and tx
2) use a single napi to handle both rx and tx

Thanks


/* Out of packets? */
if (received < budget)
virtqueue_napi_complete(napi, rq->vq, received);




[PATCH net-next RFC 4/4] virtio-net: clean tx descriptors from rx napi

2017-03-03 Thread Willem de Bruijn
From: Willem de Bruijn 

Amortize the cost of virtual interrupts by doing both rx and tx work
on reception of a receive interrupt. Together VIRTIO_F_EVENT_IDX and
vhost interrupt moderation, this suppresses most explicit tx
completion interrupts for bidirectional workloads.

Signed-off-by: Willem de Bruijn 
---
 drivers/net/virtio_net.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9a9031640179..21c575127d50 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1031,6 +1031,23 @@ static int virtnet_receive(struct receive_queue *rq, int 
budget)
return received;
 }
 
+static unsigned int free_old_xmit_skbs(struct send_queue *sq, int budget);
+
+static void virtnet_poll_cleantx(struct receive_queue *rq)
+{
+   struct virtnet_info *vi = rq->vq->vdev->priv;
+   unsigned int index = vq2rxq(rq->vq);
+   struct send_queue *sq = &vi->sq[index];
+   struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
+
+   __netif_tx_lock(txq, smp_processor_id());
+   free_old_xmit_skbs(sq, sq->napi.weight);
+   __netif_tx_unlock(txq);
+
+   if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
+   netif_wake_subqueue(vi->dev, vq2txq(sq->vq));
+}
+
 static int virtnet_poll(struct napi_struct *napi, int budget)
 {
struct receive_queue *rq =
@@ -1039,6 +1056,8 @@ static int virtnet_poll(struct napi_struct *napi, int 
budget)
 
received = virtnet_receive(rq, budget);
 
+   virtnet_poll_cleantx(rq);
+
/* Out of packets? */
if (received < budget)
virtqueue_napi_complete(napi, rq->vq, received);
-- 
2.12.0.rc1.440.g5b76565f74-goog