Re: [PATCH v3 13/17] virtio_pci: queue_reset: support VIRTIO_F_RING_RESET

2022-02-08 Thread Xuan Zhuo
On Wed, 9 Feb 2022 13:44:06 +0800, Jason Wang  wrote:
> On Tue, Feb 8, 2022 at 3:44 PM Xuan Zhuo  wrote:
> >
> > On Tue, 8 Feb 2022 10:55:37 +0800, Jason Wang  wrote:
> > > On Mon, Feb 7, 2022 at 4:19 PM Xuan Zhuo  
> > > wrote:
> > > >
> > > > On Mon, 7 Feb 2022 14:57:13 +0800, Jason Wang  
> > > > wrote:
> > > > >
> > > > > 在 2022/1/26 下午3:35, Xuan Zhuo 写道:
> > > > > > This patch implements virtio pci support for QUEUE RESET.
> > > > > >
> > > > > > Performing reset on a queue is divided into two steps:
> > > > > >
> > > > > > 1. reset_vq: reset one vq
> > > > > > 2. enable_reset_vq: re-enable the reset queue
> > > > > >
> > > > > > In the first step, these tasks will be completed:
> > > > > > 1. notify the hardware queue to reset
> > > > > > 2. recycle the buffer from vq
> > > > > > 3. release the ring of the vq
> > > > > >
> > > > > > The process of enable reset vq:
> > > > > >  vp_modern_enable_reset_vq()
> > > > > >  vp_enable_reset_vq()
> > > > > >  __vp_setup_vq()
> > > > > >  setup_vq()
> > > > > >  vring_setup_virtqueue()
> > > > > >
> > > > > > In this process, we added two parameters, vq and num, and finally 
> > > > > > passed
> > > > > > them to vring_setup_virtqueue().  And reuse the original info and 
> > > > > > vq.
> > > > > >
> > > > > > Signed-off-by: Xuan Zhuo 
> > > > > > ---
> > > > > >   drivers/virtio/virtio_pci_common.c |  36 +++
> > > > > >   drivers/virtio/virtio_pci_common.h |   5 ++
> > > > > >   drivers/virtio/virtio_pci_modern.c | 100 
> > > > > > +
> > > > > >   3 files changed, 128 insertions(+), 13 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/virtio/virtio_pci_common.c 
> > > > > > b/drivers/virtio/virtio_pci_common.c
> > > > > > index c02936d29a31..ad21638fbf66 100644
> > > > > > --- a/drivers/virtio/virtio_pci_common.c
> > > > > > +++ b/drivers/virtio/virtio_pci_common.c
> > > > > > @@ -205,23 +205,28 @@ static int vp_request_msix_vectors(struct 
> > > > > > virtio_device *vdev, int nvectors,
> > > > > > return err;
> > > > > >   }
> > > > > >
> > > > > > -static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, 
> > > > > > unsigned index,
> > > > > > -void (*callback)(struct virtqueue 
> > > > > > *vq),
> > > > > > -const char *name,
> > > > > > -bool ctx,
> > > > > > -u16 msix_vec, u16 num)
> > > > > > +struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned 
> > > > > > index,
> > > > > > + void (*callback)(struct virtqueue *vq),
> > > > > > + const char *name,
> > > > > > + bool ctx,
> > > > > > + u16 msix_vec, u16 num)
> > > > > >   {
> > > > > > struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> > > > > > -   struct virtio_pci_vq_info *info = kmalloc(sizeof *info, 
> > > > > > GFP_KERNEL);
> > > > > > +   struct virtio_pci_vq_info *info;
> > > > > > struct virtqueue *vq;
> > > > > > unsigned long flags;
> > > > > >
> > > > > > -   /* fill out our structure that represents an active queue */
> > > > > > -   if (!info)
> > > > > > -   return ERR_PTR(-ENOMEM);
> > > > > > +   info = vp_dev->vqs[index];
> > > > > > +   if (!info) {
> > > > > > +   info = kzalloc(sizeof *info, GFP_KERNEL);
> > > > > > +
> > > > > > +   /* fill out our structure that represents an active 
> > > > > > queue */
> > > > > > +   if (!info)
> > > > > > +   return ERR_PTR(-ENOMEM);
> > > > > > +   }
> > > > > >
> > > > > > vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
> > > > > > - msix_vec, NULL, num);
> > > > > > + msix_vec, info->vq, num);
> > > > > > if (IS_ERR(vq))
> > > > > > goto out_info;
> > > > > >
> > > > > > @@ -238,6 +243,9 @@ static struct virtqueue *vp_setup_vq(struct 
> > > > > > virtio_device *vdev, unsigned index,
> > > > > > return vq;
> > > > > >
> > > > > >   out_info:
> > > > > > +   if (info->vq && info->vq->reset)
> > > > > > +   return vq;
> > > > > > +
> > > > > > kfree(info);
> > > > > > return vq;
> > > > > >   }
> > > > > > @@ -248,9 +256,11 @@ static void vp_del_vq(struct virtqueue *vq)
> > > > > > struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
> > > > > > unsigned long flags;
> > > > > >
> > > > > > -   spin_lock_irqsave(_dev->lock, flags);
> > > > > > -   list_del(>node);
> > > > > > -   spin_unlock_irqrestore(_dev->lock, flags);
> > > > > > +   if (!vq->reset) {
> > > > > > +   spin_lock_irqsave(_dev->lock, flags);
> > > > > > +   list_del(>node);
> > > > > > +   spin_unlock_irqrestore(_dev->lock, flags);
> > > > > > +   }
> > > > > >
> > > > > > vp_dev->del_vq(info);
> > > > > > kfree(info);
> > > > > > diff --git 

Re: [PATCH v3 13/17] virtio_pci: queue_reset: support VIRTIO_F_RING_RESET

2022-02-08 Thread Jason Wang
On Tue, Feb 8, 2022 at 3:44 PM Xuan Zhuo  wrote:
>
> On Tue, 8 Feb 2022 10:55:37 +0800, Jason Wang  wrote:
> > On Mon, Feb 7, 2022 at 4:19 PM Xuan Zhuo  wrote:
> > >
> > > On Mon, 7 Feb 2022 14:57:13 +0800, Jason Wang  wrote:
> > > >
> > > > 在 2022/1/26 下午3:35, Xuan Zhuo 写道:
> > > > > This patch implements virtio pci support for QUEUE RESET.
> > > > >
> > > > > Performing reset on a queue is divided into two steps:
> > > > >
> > > > > 1. reset_vq: reset one vq
> > > > > 2. enable_reset_vq: re-enable the reset queue
> > > > >
> > > > > In the first step, these tasks will be completed:
> > > > > 1. notify the hardware queue to reset
> > > > > 2. recycle the buffer from vq
> > > > > 3. release the ring of the vq
> > > > >
> > > > > The process of enable reset vq:
> > > > >  vp_modern_enable_reset_vq()
> > > > >  vp_enable_reset_vq()
> > > > >  __vp_setup_vq()
> > > > >  setup_vq()
> > > > >  vring_setup_virtqueue()
> > > > >
> > > > > In this process, we added two parameters, vq and num, and finally 
> > > > > passed
> > > > > them to vring_setup_virtqueue().  And reuse the original info and vq.
> > > > >
> > > > > Signed-off-by: Xuan Zhuo 
> > > > > ---
> > > > >   drivers/virtio/virtio_pci_common.c |  36 +++
> > > > >   drivers/virtio/virtio_pci_common.h |   5 ++
> > > > >   drivers/virtio/virtio_pci_modern.c | 100 
> > > > > +
> > > > >   3 files changed, 128 insertions(+), 13 deletions(-)
> > > > >
> > > > > diff --git a/drivers/virtio/virtio_pci_common.c 
> > > > > b/drivers/virtio/virtio_pci_common.c
> > > > > index c02936d29a31..ad21638fbf66 100644
> > > > > --- a/drivers/virtio/virtio_pci_common.c
> > > > > +++ b/drivers/virtio/virtio_pci_common.c
> > > > > @@ -205,23 +205,28 @@ static int vp_request_msix_vectors(struct 
> > > > > virtio_device *vdev, int nvectors,
> > > > > return err;
> > > > >   }
> > > > >
> > > > > -static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, 
> > > > > unsigned index,
> > > > > -void (*callback)(struct virtqueue 
> > > > > *vq),
> > > > > -const char *name,
> > > > > -bool ctx,
> > > > > -u16 msix_vec, u16 num)
> > > > > +struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned 
> > > > > index,
> > > > > + void (*callback)(struct virtqueue *vq),
> > > > > + const char *name,
> > > > > + bool ctx,
> > > > > + u16 msix_vec, u16 num)
> > > > >   {
> > > > > struct virtio_pci_device *vp_dev = to_vp_device(vdev);
> > > > > -   struct virtio_pci_vq_info *info = kmalloc(sizeof *info, 
> > > > > GFP_KERNEL);
> > > > > +   struct virtio_pci_vq_info *info;
> > > > > struct virtqueue *vq;
> > > > > unsigned long flags;
> > > > >
> > > > > -   /* fill out our structure that represents an active queue */
> > > > > -   if (!info)
> > > > > -   return ERR_PTR(-ENOMEM);
> > > > > +   info = vp_dev->vqs[index];
> > > > > +   if (!info) {
> > > > > +   info = kzalloc(sizeof *info, GFP_KERNEL);
> > > > > +
> > > > > +   /* fill out our structure that represents an active queue 
> > > > > */
> > > > > +   if (!info)
> > > > > +   return ERR_PTR(-ENOMEM);
> > > > > +   }
> > > > >
> > > > > vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
> > > > > - msix_vec, NULL, num);
> > > > > + msix_vec, info->vq, num);
> > > > > if (IS_ERR(vq))
> > > > > goto out_info;
> > > > >
> > > > > @@ -238,6 +243,9 @@ static struct virtqueue *vp_setup_vq(struct 
> > > > > virtio_device *vdev, unsigned index,
> > > > > return vq;
> > > > >
> > > > >   out_info:
> > > > > +   if (info->vq && info->vq->reset)
> > > > > +   return vq;
> > > > > +
> > > > > kfree(info);
> > > > > return vq;
> > > > >   }
> > > > > @@ -248,9 +256,11 @@ static void vp_del_vq(struct virtqueue *vq)
> > > > > struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
> > > > > unsigned long flags;
> > > > >
> > > > > -   spin_lock_irqsave(_dev->lock, flags);
> > > > > -   list_del(>node);
> > > > > -   spin_unlock_irqrestore(_dev->lock, flags);
> > > > > +   if (!vq->reset) {
> > > > > +   spin_lock_irqsave(_dev->lock, flags);
> > > > > +   list_del(>node);
> > > > > +   spin_unlock_irqrestore(_dev->lock, flags);
> > > > > +   }
> > > > >
> > > > > vp_dev->del_vq(info);
> > > > > kfree(info);
> > > > > diff --git a/drivers/virtio/virtio_pci_common.h 
> > > > > b/drivers/virtio/virtio_pci_common.h
> > > > > index 65db92245e41..c1d15f7c0be4 100644
> > > > > --- a/drivers/virtio/virtio_pci_common.h
> > > > > +++ b/drivers/virtio/virtio_pci_common.h
> > > > > @@ -119,6 +119,11 @@ int vp_find_vqs(struct virtio_device *vdev, 
> > 

Re: [RFC PATCH 0/5] TUN/VirtioNet USO features support.

2022-02-08 Thread Jason Wang


在 2022/2/8 下午9:09, Andrew Melnichenko 写道:

Hi people,
Can you please review this series?



Are there any performance number to demonstrate the difference?

Thanks




On Wed, Jan 26, 2022 at 10:32 AM Yuri Benditovich
 wrote:

On Wed, Jan 26, 2022 at 9:54 AM Xuan Zhuo  wrote:

On Tue, 25 Jan 2022 10:46:57 +0200, Andrew Melnychenko  
wrote:

Added new offloads for TUN devices TUN_F_USO4 and TUN_F_USO6.
Technically they enable NETIF_F_GSO_UDP_L4
(and only if USO4 & USO6 are set simultaneously).
It allows to transmission of large UDP packets.

Different features USO4 and USO6 are required for qemu where Windows guests can
enable disable USO receives for IPv4 and IPv6 separately.
On the other side, Linux can't really differentiate USO4 and USO6, for now.
For now, to enable USO for TUN it requires enabling USO4 and USO6 together.
In the future, there would be a mechanism to control UDP_L4 GSO separately.

Test it WIP Qemu https://github.com/daynix/qemu/tree/Dev_USOv2

New types for VirtioNet already on mailing:
https://lists.oasis-open.org/archives/virtio-comment/202110/msg00010.html

Seems like this hasn't been upvoted yet.

 https://github.com/oasis-tcs/virtio-spec#use-of-github-issues

Yes, correct. This is a reason why this series of patches is RFC.


Thanks.


Also, there is a known issue with transmitting packages between two guests.
Without hacks with skb's GSO - packages are still segmented on the host's 
postrouting.

Andrew Melnychenko (5):
   uapi/linux/if_tun.h: Added new ioctl for tun/tap.
   driver/net/tun: Added features for USO.
   uapi/linux/virtio_net.h: Added USO types.
   linux/virtio_net.h: Added Support for GSO_UDP_L4 offload.
   drivers/net/virtio_net.c: Added USO support.

  drivers/net/tap.c   | 18 --
  drivers/net/tun.c   | 15 ++-
  drivers/net/virtio_net.c| 22 ++
  include/linux/virtio_net.h  | 11 +++
  include/uapi/linux/if_tun.h |  3 +++
  include/uapi/linux/virtio_net.h |  4 
  6 files changed, 66 insertions(+), 7 deletions(-)

--
2.34.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 00/17] virtio pci support VIRTIO_F_RING_RESET

2022-02-08 Thread Jason Wang
On Tue, Feb 8, 2022 at 3:56 PM Xuan Zhuo  wrote:
>
> On Tue, 08 Feb 2022 11:14:45 +0800, Xuan Zhuo  
> wrote:
> > On Tue, 8 Feb 2022 10:59:48 +0800, Jason Wang  wrote:
> > >
> > > 在 2022/2/7 下午2:02, Xuan Zhuo 写道:
> > > > On Mon, 7 Feb 2022 11:39:36 +0800, Jason Wang  
> > > > wrote:
> > > >> On Wed, Jan 26, 2022 at 3:35 PM Xuan Zhuo  
> > > >> wrote:
> > > >>> 
> > > >>> The virtio spec already supports the virtio queue reset function. 
> > > >>> This patch set
> > > >>> is to add this function to the kernel. The relevant virtio spec 
> > > >>> information is
> > > >>> here:
> > > >>>
> > > >>>  https://github.com/oasis-tcs/virtio-spec/issues/124
> > > >>>
> > > >>> Also regarding MMIO support for queue reset, I plan to support it 
> > > >>> after this
> > > >>> patch is passed.
> > > >>>
> > > >>> #14-#17 is the disable/enable function of rx/tx pair implemented by 
> > > >>> virtio-net
> > > >>> using the new helper.
> > > >> One thing that came to mind is the steering. E.g if we disable an RX,
> > > >> should we stop steering packets to that queue?
>
> Regarding this spec, if there are multiple queues disabled at the same time, 
> it
> will be a troublesome problem for the backend to select the queue,

I don't understand this, for such a kind of backend or device it can
simply claim not to support this feature.

> so I want to
> directly define that only one queue is allowed to reset at the same time, do 
> you
> think this is appropriate?

This sounds very complicated. E.g how can we define an API that can
reset more than one queues? (currently PCI only support MMIO access).

>
> In terms of the implementation of backend queue reselection, it would be more
> convenient to implement if we drop packets directly. Do you think we must
> implement this reselection function?

Rethink of this. It should be fine and much simpler.

Thanks

>
> Thanks.
>
> > > > Yes, we should reselect a queue.
> > > >
> > > > Thanks.
> > >
> > >
> > > Maybe a spec patch for that?
> >
> > Yes, I also realized this. Although virtio-net's disable/enable is 
> > implemented
> > based on queue reset, virtio-net still has to define its own flag and define
> > some more detailed implementations.
> >
> > I'll think about it and post a spec patch.
> >
> > Thanks.
> >
> > >
> > > Thanks
> > >
> > >
> > > >
> > > >> Thanks
> > > >>
> > > >>> This function is not currently referenced by other
> > > >>> functions. It is more to show the usage of the new helper, I not sure 
> > > >>> if they
> > > >>> are going to be merged together.
> > > >>>
> > > >>> Please review. Thanks.
> > > >>>
> > > >>> v3:
> > > >>>1. keep vq, irq unreleased
> > > >>>
> > > >>> Xuan Zhuo (17):
> > > >>>virtio_pci: struct virtio_pci_common_cfg add queue_notify_data
> > > >>>virtio: queue_reset: add VIRTIO_F_RING_RESET
> > > >>>virtio: queue_reset: struct virtio_config_ops add callbacks for
> > > >>>  queue_reset
> > > >>>virtio: queue_reset: add helper
> > > >>>vritio_ring: queue_reset: extract the release function of the vq 
> > > >>> ring
> > > >>>virtio_ring: queue_reset: split: add __vring_init_virtqueue()
> > > >>>virtio_ring: queue_reset: split: support enable reset queue
> > > >>>virtio_ring: queue_reset: packed: support enable reset queue
> > > >>>virtio_ring: queue_reset: add vring_reset_virtqueue()
> > > >>>virtio_pci: queue_reset: update struct virtio_pci_common_cfg and
> > > >>>  option functions
> > > >>>virtio_pci: queue_reset: release vq by vp_dev->vqs
> > > >>>virtio_pci: queue_reset: setup_vq use vring_setup_virtqueue()
> > > >>>virtio_pci: queue_reset: support VIRTIO_F_RING_RESET
> > > >>>virtio_net: virtnet_tx_timeout() fix style
> > > >>>virtio_net: virtnet_tx_timeout() stop ref sq->vq
> > > >>>virtio_net: split free_unused_bufs()
> > > >>>virtio_net: support pair disable/enable
> > > >>>
> > > >>>   drivers/net/virtio_net.c   | 220 
> > > >>> ++---
> > > >>>   drivers/virtio/virtio_pci_common.c |  62 ---
> > > >>>   drivers/virtio/virtio_pci_common.h |  11 +-
> > > >>>   drivers/virtio/virtio_pci_legacy.c |   5 +-
> > > >>>   drivers/virtio/virtio_pci_modern.c | 120 +-
> > > >>>   drivers/virtio/virtio_pci_modern_dev.c |  28 
> > > >>>   drivers/virtio/virtio_ring.c   | 144 +++-
> > > >>>   include/linux/virtio.h |   1 +
> > > >>>   include/linux/virtio_config.h  |  75 -
> > > >>>   include/linux/virtio_pci_modern.h  |   2 +
> > > >>>   include/linux/virtio_ring.h|  42 +++--
> > > >>>   include/uapi/linux/virtio_config.h |   7 +-
> > > >>>   include/uapi/linux/virtio_pci.h|   2 +
> > > >>>   13 files changed, 618 insertions(+), 101 deletions(-)
> > > >>>
> > > >>> --
> > > >>> 2.31.0
> > > >>>
> > >
> > 

Re: [RFC PATCH 5/5] drivers/net/virtio_net.c: Added USO support.

2022-02-08 Thread Jason Wang


在 2022/1/25 下午4:47, Andrew Melnychenko 写道:

Now, it possible to enable GSO_UDP_L4("tx-udp-segmentation") for VirtioNet.

Signed-off-by: Andrew Melnychenko 
---
  drivers/net/virtio_net.c | 22 ++
  1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index a801ea40908f..a45eee022be4 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -60,13 +60,17 @@ static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_TSO6,
VIRTIO_NET_F_GUEST_ECN,
VIRTIO_NET_F_GUEST_UFO,
-   VIRTIO_NET_F_GUEST_CSUM
+   VIRTIO_NET_F_GUEST_CSUM,
+   VIRTIO_NET_F_GUEST_USO4,
+   VIRTIO_NET_F_GUEST_USO6
  };
  
  #define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \

(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
(1ULL << VIRTIO_NET_F_GUEST_ECN)  | \
-   (1ULL << VIRTIO_NET_F_GUEST_UFO))
+   (1ULL << VIRTIO_NET_F_GUEST_UFO)  | \
+   (1ULL << VIRTIO_NET_F_GUEST_USO4) | \
+   (1ULL << VIRTIO_NET_F_GUEST_USO6))
  
  struct virtnet_stat_desc {

char desc[ETH_GSTRING_LEN];
@@ -2530,7 +2534,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct 
bpf_prog *prog,
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
-   virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
+   virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM) ||
+   virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO4) ||
+   virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_USO6))) {
NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing 
GRO_HW/CSUM, disable GRO_HW/CSUM first");
return -EOPNOTSUPP;
}
@@ -3155,6 +3161,8 @@ static int virtnet_probe(struct virtio_device *vdev)
dev->hw_features |= NETIF_F_TSO6;
if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
dev->hw_features |= NETIF_F_TSO_ECN;
+   if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
+   dev->hw_features |= NETIF_F_GSO_UDP_L4;
  
  		dev->features |= NETIF_F_GSO_ROBUST;
  
@@ -3169,6 +3177,9 @@ static int virtnet_probe(struct virtio_device *vdev)

dev->features |= NETIF_F_GRO_HW;
if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))
dev->hw_features |= NETIF_F_GRO_HW;
+   if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_USO4) ||
+   virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_USO6))
+   dev->hw_features |= NETIF_F_LRO;



I think we need to use GRO_HW, see dbcf24d153884 ("virtio-net: use 
NETIF_F_GRO_HW instead of NETIF_F_LRO"


Thanks


  
  	dev->vlan_features = dev->features;
  
@@ -3200,7 +3211,9 @@ static int virtnet_probe(struct virtio_device *vdev)

if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
-   virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
+   virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO) ||
+   virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_USO4) ||
+   virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_USO6))
vi->big_packets = true;
  
  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))

@@ -3400,6 +3413,7 @@ static struct virtio_device_id id_table[] = {
VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, 
VIRTIO_NET_F_GUEST_TSO6, \
VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
+   VIRTIO_NET_F_HOST_USO, VIRTIO_NET_F_GUEST_USO4, 
VIRTIO_NET_F_GUEST_USO6, \
VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [RFC PATCH 4/5] linux/virtio_net.h: Added Support for GSO_UDP_L4 offload.

2022-02-08 Thread Jason Wang


在 2022/1/25 下午4:47, Andrew Melnychenko 写道:

Now, it's possible to convert vnet packets from/to skb.



I suggest to change the title to "net: support XXX offload in vnet header"

Thanks




Signed-off-by: Andrew Melnychenko 
---
  include/linux/virtio_net.h | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index a960de68ac69..9311d41d0a81 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -17,6 +17,9 @@ static inline bool virtio_net_hdr_match_proto(__be16 
protocol, __u8 gso_type)
case VIRTIO_NET_HDR_GSO_UDP:
return protocol == cpu_to_be16(ETH_P_IP) ||
   protocol == cpu_to_be16(ETH_P_IPV6);
+   case VIRTIO_NET_HDR_GSO_UDP_L4:
+   return protocol == cpu_to_be16(ETH_P_IP) ||
+  protocol == cpu_to_be16(ETH_P_IPV6);
default:
return false;
}
@@ -31,6 +34,7 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff 
*skb,
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
case VIRTIO_NET_HDR_GSO_TCPV4:
case VIRTIO_NET_HDR_GSO_UDP:
+   case VIRTIO_NET_HDR_GSO_UDP_L4:
skb->protocol = cpu_to_be16(ETH_P_IP);
break;
case VIRTIO_NET_HDR_GSO_TCPV6:
@@ -69,6 +73,11 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
ip_proto = IPPROTO_UDP;
thlen = sizeof(struct udphdr);
break;
+   case VIRTIO_NET_HDR_GSO_UDP_L4:
+   gso_type = SKB_GSO_UDP_L4;
+   ip_proto = IPPROTO_UDP;
+   thlen = sizeof(struct udphdr);
+   break;
default:
return -EINVAL;
}
@@ -182,6 +191,8 @@ static inline int virtio_net_hdr_from_skb(const struct 
sk_buff *skb,
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
else if (sinfo->gso_type & SKB_GSO_TCPV6)
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+   else if (sinfo->gso_type & SKB_GSO_UDP_L4)
+   hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
else
return -EINVAL;
if (sinfo->gso_type & SKB_GSO_TCP_ECN)


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [RFC PATCH 3/5] uapi/linux/virtio_net.h: Added USO types.

2022-02-08 Thread Jason Wang


在 2022/1/25 下午4:47, Andrew Melnychenko 写道:

Added new GSO type for USO: VIRTIO_NET_HDR_GSO_UDP_L4.
Feature VIRTIO_NET_F_HOST_USO allows to enable NETIF_F_GSO_UDP_L4.
Separated VIRTIO_NET_F_GUEST_USO4 & VIRTIO_NET_F_GUEST_USO6 features
required for Windows guests.

Signed-off-by: Andrew Melnychenko 
---
  include/uapi/linux/virtio_net.h | 4 
  1 file changed, 4 insertions(+)

diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index 3f55a4215f11..620addc5767b 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -56,6 +56,9 @@
  #define VIRTIO_NET_F_MQ   22  /* Device supports Receive Flow
 * Steering */
  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */
+#define VIRTIO_NET_F_GUEST_USO454  /* Guest can handle USOv4 in. */
+#define VIRTIO_NET_F_GUEST_USO655  /* Guest can handle USOv6 in. */
+#define VIRTIO_NET_F_HOST_USO  56  /* Host can handle USO in. */



I think it's better to be consistent here. Either we split in both guest 
and host or not.


Thanks


  
  #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */

  #define VIRTIO_NET_F_RSS60/* Supports RSS RX steering */
@@ -130,6 +133,7 @@ struct virtio_net_hdr_v1 {
  #define VIRTIO_NET_HDR_GSO_TCPV4  1   /* GSO frame, IPv4 TCP (TSO) */
  #define VIRTIO_NET_HDR_GSO_UDP3   /* GSO frame, IPv4 UDP 
(UFO) */
  #define VIRTIO_NET_HDR_GSO_TCPV6  4   /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_UDP_L4  5   /* GSO frame, IPv4 & IPv6 UDP 
(USO) */
  #define VIRTIO_NET_HDR_GSO_ECN0x80/* TCP has ECN set */
__u8 gso_type;
__virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [RFC PATCH 2/5] driver/net/tun: Added features for USO.

2022-02-08 Thread Jason Wang


在 2022/1/25 下午4:46, Andrew Melnychenko 写道:

Added support for USO4 and USO6, also added code for new ioctl 
TUNGETSUPPORTEDOFFLOADS.
For now, to "enable" USO, it's required to set both USO4 and USO6 
simultaneously.
USO enables NETIF_F_GSO_UDP_L4.

Signed-off-by: Andrew Melnychenko 
---
  drivers/net/tap.c | 18 --
  drivers/net/tun.c | 15 ++-
  2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 8e3a28ba6b28..82d742ba78b1 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -940,6 +940,10 @@ static int set_offload(struct tap_queue *q, unsigned long 
arg)
if (arg & TUN_F_TSO6)
feature_mask |= NETIF_F_TSO6;
}
+
+   /* TODO: for now USO4 and USO6 should work simultaneously */
+   if (arg & (TUN_F_USO4 | TUN_F_USO6) == (TUN_F_USO4 | 
TUN_F_USO6))
+   features |= NETIF_F_GSO_UDP_L4;



If kernel doesn't want to split the GSO_UDP features, I wonder how much 
value to keep separated features for TUN and virtio.


Thanks



}
  
  	/* tun/tap driver inverts the usage for TSO offloads, where

@@ -950,7 +954,8 @@ static int set_offload(struct tap_queue *q, unsigned long 
arg)
 * When user space turns off TSO, we turn off GSO/LRO so that
 * user-space will not receive TSO frames.
 */
-   if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6))
+   if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6) ||
+   feature_mask & (TUN_F_USO4 | TUN_F_USO6) == (TUN_F_USO4 | 
TUN_F_USO6))
features |= RX_OFFLOADS;
else
features &= ~RX_OFFLOADS;
@@ -979,6 +984,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
unsigned short u;
int __user *sp = argp;
struct sockaddr sa;
+   unsigned int supported_offloads;
int s;
int ret;
  
@@ -1074,7 +1080,8 @@ static long tap_ioctl(struct file *file, unsigned int cmd,

case TUNSETOFFLOAD:
/* let the user check for future flags */
if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
-   TUN_F_TSO_ECN | TUN_F_UFO))
+   TUN_F_TSO_ECN | TUN_F_UFO |
+   TUN_F_USO4 | TUN_F_USO6))
return -EINVAL;
  
  		rtnl_lock();

@@ -1082,6 +1089,13 @@ static long tap_ioctl(struct file *file, unsigned int 
cmd,
rtnl_unlock();
return ret;
  
+	case TUNGETSUPPORTEDOFFLOADS:

+   supported_offloads = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
+   TUN_F_TSO_ECN | TUN_F_UFO | 
TUN_F_USO4 | TUN_F_USO6;
+   if (copy_to_user(, _offloads, 
sizeof(supported_offloads)))
+   return -EFAULT;
+   return 0;
+
case SIOCGIFHWADDR:
rtnl_lock();
tap = tap_get_tap_dev(q);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index fed85447701a..4f2105d1e6f1 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -185,7 +185,7 @@ struct tun_struct {
struct net_device   *dev;
netdev_features_t   set_features;
  #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
- NETIF_F_TSO6)
+ NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4)
  
  	int			align;

int vnet_hdr_sz;
@@ -2821,6 +2821,12 @@ static int set_offload(struct tun_struct *tun, unsigned 
long arg)
}
  
  		arg &= ~TUN_F_UFO;

+
+   /* TODO: for now USO4 and USO6 should work simultaneously */
+   if (arg & TUN_F_USO4 && arg & TUN_F_USO6) {
+   features |= NETIF_F_GSO_UDP_L4;
+   arg &= ~(TUN_F_USO4 | TUN_F_USO6);
+   }
}
  
  	/* This gives the user a way to test for new features in future by

@@ -2991,6 +2997,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
int sndbuf;
int vnet_hdr_sz;
int le;
+   unsigned int supported_offloads;
int ret;
bool do_notify = false;
  
@@ -3154,6 +3161,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,

case TUNSETOFFLOAD:
ret = set_offload(tun, arg);
break;
+   case TUNGETSUPPORTEDOFFLOADS:
+   supported_offloads = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
+   TUN_F_TSO_ECN | TUN_F_UFO | TUN_F_USO4 | 
TUN_F_USO6;
+   if (copy_to_user(, _offloads, 
sizeof(supported_offloads)))
+   ret = -EFAULT;
+   break;
  
  	case TUNSETTXFILTER:

/* Can be set only for TAPs */


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org

Re: [RFC PATCH 1/5] uapi/linux/if_tun.h: Added new ioctl for tun/tap.

2022-02-08 Thread Jason Wang


在 2022/1/25 下午4:46, Andrew Melnychenko 写道:

Added TUNGETSUPPORTEDOFFLOADS that should allow
to get bits of supported offloads.



So we don't use dedicated ioctls in the past, instead, we just probing 
by checking the return value of TUNSETOFFLOADS.


E.g qemu has the following codes:

int tap_probe_has_ufo(int fd)
{
    unsigned offload;

    offload = TUN_F_CSUM | TUN_F_UFO;

    if (ioctl(fd, TUNSETOFFLOAD, offload) < 0)
    return 0;

    return 1;
}

Any reason we can't keep using that?

Thanks



Added 2 additional offlloads for USO(IPv4 & IPv6).
Separate offloads are required for Windows VM guests,
g.e. Windows may set USO rx only for IPv4.

Signed-off-by: Andrew Melnychenko 
---
  include/uapi/linux/if_tun.h | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 454ae31b93c7..07680fae6e18 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -61,6 +61,7 @@
  #define TUNSETFILTEREBPF _IOR('T', 225, int)
  #define TUNSETCARRIER _IOW('T', 226, int)
  #define TUNGETDEVNETNS _IO('T', 227)
+#define TUNGETSUPPORTEDOFFLOADS _IOR('T', 228, unsigned int)
  
  /* TUNSETIFF ifr flags */

  #define IFF_TUN   0x0001
@@ -88,6 +89,8 @@
  #define TUN_F_TSO60x04/* I can handle TSO for IPv6 packets */
  #define TUN_F_TSO_ECN 0x08/* I can handle TSO with ECN bits. */
  #define TUN_F_UFO 0x10/* I can handle UFO packets */
+#define TUN_F_USO4 0x20/* I can handle USO for IPv4 packets */
+#define TUN_F_USO6 0x40/* I can handle USO for IPv6 packets */
  
  /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */

  #define TUN_PKT_STRIP 0x0001


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: one question about your patch "vhost: log dirty page correctly"

2022-02-08 Thread Jason Wang
Adding lists.

On Wed, Feb 9, 2022 at 4:58 AM Michael S. Tsirkin  wrote:
>
> On Tue, Feb 08, 2022 at 11:37:00AM -0800, Xingyu Li wrote:
> > Hi Jason,
> >
> > Sorry to disturb you. I have a question about your Linux kernel
> > patch-- 1981e4c9a97("vhost: log dirty page correctly"). I noted that the 
> > commit
> > message says that the fix tag of this patch is 6b1e6cc7855b ("vhost: new 
> > device
> > IOTLB API"). It also says that the bug occured when try to log 
> > GIOVA.However,
> > when I look at the the commit diff of the 6b1e6cc7855b, I do not find the
> > related contents to modify the vhost_log_write() or some log-related 
> > functions.
> > Can you give me some guidance on this?

So the logic is: 6b1e6cc7855b doesn't touch vhost_log_write() so the
log is wrong since vhost_log_write() is based on GPA. That's why I
wrote 1981e4c9a97 where I translated GIOVA to GPA before doing the
log.

> >
> > --
> > Yours sincerely,
> > Xingyu
>
> Pls CC mailing list on questions. I have a policy against replying
> off list.

Right.

Thanks

>
> Thanks!
>
> --
> MST
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3 4/4] drivers/net/virtio_net: Added RSS hash report control.

2022-02-08 Thread Willem de Bruijn
On Tue, Feb 8, 2022 at 1:19 PM Andrew Melnychenko  wrote:
>
> Now it's possible to control supported hashflows.
> Added hashflow set/get callbacks.
> Also, disabling RXH_IP_SRC/DST for TCP would disable then for UDP.

I don't follow this comment. Can you elaborate?

> TCP and UDP supports only:
> ethtool -U eth0 rx-flow-hash tcp4 sd
> RXH_IP_SRC + RXH_IP_DST
> ethtool -U eth0 rx-flow-hash tcp4 sdfn
> RXH_IP_SRC + RXH_IP_DST + RXH_L4_B_0_1 + RXH_L4_B_2_3
>
> Signed-off-by: Andrew Melnychenko 
> ---
>  drivers/net/virtio_net.c | 141 ++-
>  1 file changed, 140 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 543da2fbdd2d..88759d5e693c 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -231,6 +231,7 @@ struct virtnet_info {
> u8 rss_key_size;
> u16 rss_indir_table_size;
> u32 rss_hash_types_supported;
> +   u32 rss_hash_types_saved;

hash_types_active?

> +static bool virtnet_set_hashflow(struct virtnet_info *vi, struct 
> ethtool_rxnfc *info)
> +{
> +   u32 new_hashtypes = vi->rss_hash_types_saved;
> +   bool is_disable = info->data & RXH_DISCARD;
> +   bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | 
> RXH_L4_B_2_3);
> +
> +   /* supports only 'sd', 'sdfn' and 'r' */
> +   if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable))

maybe add an is_l3

> +   return false;
> +
> +   switch (info->flow_type) {
> +   case TCP_V4_FLOW:
> +   new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | 
> VIRTIO_NET_RSS_HASH_TYPE_TCPv4);
> +   if (!is_disable)
> +   new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
> +   | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 
> 0);
> +   break;
> +   case UDP_V4_FLOW:
> +   new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | 
> VIRTIO_NET_RSS_HASH_TYPE_UDPv4);
> +   if (!is_disable)
> +   new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
> +   | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 
> 0);
> +   break;
> +   case IPV4_FLOW:
> +   new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4;
> +   if (!is_disable)
> +   new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4;
> +   break;
> +   case TCP_V6_FLOW:
> +   new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | 
> VIRTIO_NET_RSS_HASH_TYPE_TCPv6);
> +   if (!is_disable)
> +   new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
> +   | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 
> 0);
> +   break;
> +   case UDP_V6_FLOW:
> +   new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | 
> VIRTIO_NET_RSS_HASH_TYPE_UDPv6);
> +   if (!is_disable)
> +   new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
> +   | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 
> 0);
> +   break;
> +   case IPV6_FLOW:
> +   new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6;
> +   if (!is_disable)
> +   new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6;
> +   break;
> +   default:
> +   /* unsupported flow */
> +   return false;
> +   }
> +
> +   /* if unsupported hashtype was set */
> +   if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported))
> +   return false;
> +
> +   if (new_hashtypes != vi->rss_hash_types_saved) {
> +   vi->rss_hash_types_saved = new_hashtypes;

should only be updated if the commit function returned success?
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3 3/4] drivers/net/virtio_net: Added RSS hash report.

2022-02-08 Thread Willem de Bruijn
On Tue, Feb 8, 2022 at 1:19 PM Andrew Melnychenko  wrote:
>
> Added features for RSS hash report.
> If hash is provided - it sets to skb.
> Added checks if rss and/or hash are enabled together.
>
> Signed-off-by: Andrew Melnychenko 
> ---
>  drivers/net/virtio_net.c | 51 ++--
>  1 file changed, 44 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 495aed524e33..543da2fbdd2d 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -227,6 +227,7 @@ struct virtnet_info {
>
> /* Host supports rss and/or hash report */
> bool has_rss;
> +   bool has_rss_hash_report;
> u8 rss_key_size;
> u16 rss_indir_table_size;
> u32 rss_hash_types_supported;
> @@ -421,7 +422,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info 
> *vi,
>
> hdr_len = vi->hdr_len;
> if (vi->mergeable_rx_bufs)
> -   hdr_padded_len = sizeof(*hdr);
> +   hdr_padded_len = hdr_len;

Belongs in patch 1?

> else
> hdr_padded_len = sizeof(struct padded_vnet_hdr);
>
> @@ -1156,6 +1157,8 @@ static void receive_buf(struct virtnet_info *vi, struct 
> receive_queue *rq,
> struct net_device *dev = vi->dev;
> struct sk_buff *skb;
> struct virtio_net_hdr_mrg_rxbuf *hdr;
> +   struct virtio_net_hdr_v1_hash *hdr_hash;
> +   enum pkt_hash_types rss_hash_type;
>
> if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
> pr_debug("%s: short packet %i\n", dev->name, len);
> @@ -1182,6 +1185,29 @@ static void receive_buf(struct virtnet_info *vi, 
> struct receive_queue *rq,
> return;
>
> hdr = skb_vnet_hdr(skb);
> +   if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) {

Can the first be true if the second is not?

> +   hdr_hash = (struct virtio_net_hdr_v1_hash *)(hdr);
> +
> +   switch (hdr_hash->hash_report) {
> +   case VIRTIO_NET_HASH_REPORT_TCPv4:
> +   case VIRTIO_NET_HASH_REPORT_UDPv4:
> +   case VIRTIO_NET_HASH_REPORT_TCPv6:
> +   case VIRTIO_NET_HASH_REPORT_UDPv6:
> +   case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
> +   case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
> +   rss_hash_type = PKT_HASH_TYPE_L4;
> +   break;
> +   case VIRTIO_NET_HASH_REPORT_IPv4:
> +   case VIRTIO_NET_HASH_REPORT_IPv6:
> +   case VIRTIO_NET_HASH_REPORT_IPv6_EX:
> +   rss_hash_type = PKT_HASH_TYPE_L3;
> +   break;
> +   case VIRTIO_NET_HASH_REPORT_NONE:
> +   default:
> +   rss_hash_type = PKT_HASH_TYPE_NONE;
> +   }
> +   skb_set_hash(skb, hdr_hash->hash_value, rss_hash_type);
> +   }

so many lines, perhaps deserves a helper function

>
> if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
> skb->ip_summed = CHECKSUM_UNNECESSARY;
> @@ -2232,7 +2258,8 @@ static bool virtnet_commit_rss_command(struct 
> virtnet_info *vi)
> sg_set_buf([3], vi->ctrl->rss.key, sg_buf_size);
>
> if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
> - VIRTIO_NET_CTRL_MQ_RSS_CONFIG, sgs)) {
> + vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
> + : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) {
> dev_warn(>dev, "VIRTIONET issue with committing RSS 
> sgs\n");
> return false;
> }
> @@ -3230,6 +3257,8 @@ static bool virtnet_validate_features(struct 
> virtio_device *vdev)
>  VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
>  "VIRTIO_NET_F_CTRL_VQ") ||
>  VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
> +"VIRTIO_NET_F_CTRL_VQ") ||
> +VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
>  "VIRTIO_NET_F_CTRL_VQ"))) {
> return false;
> }
> @@ -3365,8 +3394,13 @@ static int virtnet_probe(struct virtio_device *vdev)
> if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
> vi->mergeable_rx_bufs = true;
>
> -   if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
> +   if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
> +   vi->has_rss_hash_report = true;
> +
> +   if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
> vi->has_rss = true;
> +
> +   if (vi->has_rss || vi->has_rss_hash_report) {
> vi->rss_indir_table_size =
> virtio_cread16(vdev, offsetof(struct 
> virtio_net_config,

should indir table size be zero if only hash report is enabled?

> rss_max_indirection_table_length));
> @@ -3382,8 +3416,11 @@ static 

Re: [PATCH v3 2/4] drivers/net/virtio_net: Added basic RSS support.

2022-02-08 Thread Willem de Bruijn
On Tue, Feb 8, 2022 at 1:19 PM Andrew Melnychenko  wrote:
>
> Added features for RSS.
> Added initialization, RXHASH feature and ethtool ops.
> By default RSS/RXHASH is disabled.
> Virtio RSS "IPv6 extensions" hashes disabled.
> Added ethtools ops to set key and indirection table.
>
> Signed-off-by: Andrew Melnychenko 
> ---
>  drivers/net/virtio_net.c | 191 +--
>  1 file changed, 185 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 1404e683a2fd..495aed524e33 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -169,6 +169,24 @@ struct receive_queue {
> struct xdp_rxq_info xdp_rxq;
>  };
>
> +/* This structure can contain rss message with maximum settings for 
> indirection table and keysize
> + * Note, that default structure that describes RSS configuration 
> virtio_net_rss_config
> + * contains same info but can't handle table values.
> + * In any case, structure would be passed to virtio hw through sg_buf split 
> by parts
> + * because table sizes may be differ according to the device configuration.
> + */
> +#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40

Future proof, may want to support larger sizes.

netdevice.h defines NETDEV_RSS_KEY_LEN at 52.

tools/testing/selftests/net/toeplitz.c supports up to 60

> +#define VIRTIO_NET_RSS_MAX_TABLE_LEN128
> +struct virtio_net_ctrl_rss {
> +   u32 hash_types;

conversely, u32 is a bit extreme?

> +   u16 indirection_table_mask;
> +   u16 unclassified_queue;
> +   u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN];
> +   u16 max_tx_vq;
> +   u8 hash_key_length;
> +   u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
> +};
> +
>  /* Control VQ buffers: protected by the rtnl lock */
>  struct control_buf {
> struct virtio_net_ctrl_hdr hdr;
> @@ -178,6 +196,7 @@ struct control_buf {
> u8 allmulti;
> __virtio16 vid;
> __virtio64 offloads;
> +   struct virtio_net_ctrl_rss rss;
>  };
>
>  struct virtnet_info {
> @@ -206,6 +225,12 @@ struct virtnet_info {
> /* Host will merge rx buffers for big packets (shake it! shake it!) */
> bool mergeable_rx_bufs;
>
> +   /* Host supports rss and/or hash report */
> +   bool has_rss;
> +   u8 rss_key_size;
> +   u16 rss_indir_table_size;
> +   u32 rss_hash_types_supported;
> +
> /* Has control virtqueue */
> bool has_cvq;
>
> @@ -2184,6 +2209,56 @@ static void virtnet_get_ringparam(struct net_device 
> *dev,
> ring->tx_pending = ring->tx_max_pending;
>  }
>
> +static bool virtnet_commit_rss_command(struct virtnet_info *vi)
> +{
> +   struct net_device *dev = vi->dev;
> +   struct scatterlist sgs[4];
> +   unsigned int sg_buf_size;
> +
> +   /* prepare sgs */
> +   sg_init_table(sgs, 4);
> +
> +   sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table);
> +   sg_set_buf([0], >ctrl->rss, sg_buf_size);
> +
> +   sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size;
> +   sg_set_buf([1], vi->ctrl->rss.indirection_table, sg_buf_size);
> +
> +   sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key)
> +   - offsetof(struct virtio_net_ctrl_rss, max_tx_vq);
> +   sg_set_buf([2], >ctrl->rss.max_tx_vq, sg_buf_size);
> +
> +   sg_buf_size = vi->rss_key_size;
> +   sg_set_buf([3], vi->ctrl->rss.key, sg_buf_size);
> +
> +   if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
> + VIRTIO_NET_CTRL_MQ_RSS_CONFIG, sgs)) {
> +   dev_warn(>dev, "VIRTIONET issue with committing RSS 
> sgs\n");
> +   return false;
> +   }
> +   return true;
> +}
> +
> +static void virtnet_init_default_rss(struct virtnet_info *vi)
> +{
> +   u32 indir_val = 0;
> +   int i = 0;
> +
> +   vi->ctrl->rss.hash_types = vi->rss_hash_types_supported;
> +   vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size - 1;

Is table size always a power of two?

> +   vi->ctrl->rss.unclassified_queue = 0;
> +
> +   for (; i < vi->rss_indir_table_size; ++i) {
> +   indir_val = ethtool_rxfh_indir_default(i, 
> vi->curr_queue_pairs);
> +   vi->ctrl->rss.indirection_table[i] = indir_val;
> +   }
> +
> +   vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs;
> +   vi->ctrl->rss.hash_key_length = vi->rss_key_size;
> +
> +   netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
> +}
> +
>
>  static void virtnet_get_drvinfo(struct net_device *dev,
> struct ethtool_drvinfo *info)
> @@ -2412,6 +2487,71 @@ static void virtnet_update_settings(struct 
> virtnet_info *vi)
> vi->duplex = duplex;
>  }
>
> +static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
> +{
> +   return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
> +}
> +
> +static u32 

[PATCH v3 2/4] drivers/net/virtio_net: Added basic RSS support.

2022-02-08 Thread Andrew Melnychenko
Added features for RSS.
Added initialization, RXHASH feature and ethtool ops.
By default RSS/RXHASH is disabled.
Virtio RSS "IPv6 extensions" hashes disabled.
Added ethtools ops to set key and indirection table.

Signed-off-by: Andrew Melnychenko 
---
 drivers/net/virtio_net.c | 191 +--
 1 file changed, 185 insertions(+), 6 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1404e683a2fd..495aed524e33 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -169,6 +169,24 @@ struct receive_queue {
struct xdp_rxq_info xdp_rxq;
 };
 
+/* This structure can contain rss message with maximum settings for 
indirection table and keysize
+ * Note, that default structure that describes RSS configuration 
virtio_net_rss_config
+ * contains same info but can't handle table values.
+ * In any case, structure would be passed to virtio hw through sg_buf split by 
parts
+ * because table sizes may be differ according to the device configuration.
+ */
+#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
+#define VIRTIO_NET_RSS_MAX_TABLE_LEN128
+struct virtio_net_ctrl_rss {
+   u32 hash_types;
+   u16 indirection_table_mask;
+   u16 unclassified_queue;
+   u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN];
+   u16 max_tx_vq;
+   u8 hash_key_length;
+   u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
+};
+
 /* Control VQ buffers: protected by the rtnl lock */
 struct control_buf {
struct virtio_net_ctrl_hdr hdr;
@@ -178,6 +196,7 @@ struct control_buf {
u8 allmulti;
__virtio16 vid;
__virtio64 offloads;
+   struct virtio_net_ctrl_rss rss;
 };
 
 struct virtnet_info {
@@ -206,6 +225,12 @@ struct virtnet_info {
/* Host will merge rx buffers for big packets (shake it! shake it!) */
bool mergeable_rx_bufs;
 
+   /* Host supports rss and/or hash report */
+   bool has_rss;
+   u8 rss_key_size;
+   u16 rss_indir_table_size;
+   u32 rss_hash_types_supported;
+
/* Has control virtqueue */
bool has_cvq;
 
@@ -2184,6 +2209,56 @@ static void virtnet_get_ringparam(struct net_device *dev,
ring->tx_pending = ring->tx_max_pending;
 }
 
+static bool virtnet_commit_rss_command(struct virtnet_info *vi)
+{
+   struct net_device *dev = vi->dev;
+   struct scatterlist sgs[4];
+   unsigned int sg_buf_size;
+
+   /* prepare sgs */
+   sg_init_table(sgs, 4);
+
+   sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table);
+   sg_set_buf([0], >ctrl->rss, sg_buf_size);
+
+   sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size;
+   sg_set_buf([1], vi->ctrl->rss.indirection_table, sg_buf_size);
+
+   sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key)
+   - offsetof(struct virtio_net_ctrl_rss, max_tx_vq);
+   sg_set_buf([2], >ctrl->rss.max_tx_vq, sg_buf_size);
+
+   sg_buf_size = vi->rss_key_size;
+   sg_set_buf([3], vi->ctrl->rss.key, sg_buf_size);
+
+   if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
+ VIRTIO_NET_CTRL_MQ_RSS_CONFIG, sgs)) {
+   dev_warn(>dev, "VIRTIONET issue with committing RSS 
sgs\n");
+   return false;
+   }
+   return true;
+}
+
+static void virtnet_init_default_rss(struct virtnet_info *vi)
+{
+   u32 indir_val = 0;
+   int i = 0;
+
+   vi->ctrl->rss.hash_types = vi->rss_hash_types_supported;
+   vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size - 1;
+   vi->ctrl->rss.unclassified_queue = 0;
+
+   for (; i < vi->rss_indir_table_size; ++i) {
+   indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs);
+   vi->ctrl->rss.indirection_table[i] = indir_val;
+   }
+
+   vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs;
+   vi->ctrl->rss.hash_key_length = vi->rss_key_size;
+
+   netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
+}
+
 
 static void virtnet_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
@@ -2412,6 +2487,71 @@ static void virtnet_update_settings(struct virtnet_info 
*vi)
vi->duplex = duplex;
 }
 
+static u32 virtnet_get_rxfh_key_size(struct net_device *dev)
+{
+   return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size;
+}
+
+static u32 virtnet_get_rxfh_indir_size(struct net_device *dev)
+{
+   return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size;
+}
+
+static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 
*hfunc)
+{
+   struct virtnet_info *vi = netdev_priv(dev);
+   int i;
+
+   if (indir) {
+   for (i = 0; i < vi->rss_indir_table_size; ++i)
+   indir[i] = vi->ctrl->rss.indirection_table[i];
+   }
+
+   if (key)
+   memcpy(key, vi->ctrl->rss.key, vi->rss_key_size);
+
+   if (hfunc)
+  

[PATCH v3 4/4] drivers/net/virtio_net: Added RSS hash report control.

2022-02-08 Thread Andrew Melnychenko
Now it's possible to control supported hashflows.
Added hashflow set/get callbacks.
Also, disabling RXH_IP_SRC/DST for TCP would disable then for UDP.
TCP and UDP supports only:
ethtool -U eth0 rx-flow-hash tcp4 sd
RXH_IP_SRC + RXH_IP_DST
ethtool -U eth0 rx-flow-hash tcp4 sdfn
RXH_IP_SRC + RXH_IP_DST + RXH_L4_B_0_1 + RXH_L4_B_2_3

Signed-off-by: Andrew Melnychenko 
---
 drivers/net/virtio_net.c | 141 ++-
 1 file changed, 140 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 543da2fbdd2d..88759d5e693c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -231,6 +231,7 @@ struct virtnet_info {
u8 rss_key_size;
u16 rss_indir_table_size;
u32 rss_hash_types_supported;
+   u32 rss_hash_types_saved;
 
/* Has control virtqueue */
bool has_cvq;
@@ -2272,6 +2273,7 @@ static void virtnet_init_default_rss(struct virtnet_info 
*vi)
int i = 0;
 
vi->ctrl->rss.hash_types = vi->rss_hash_types_supported;
+   vi->rss_hash_types_saved = vi->rss_hash_types_supported;
vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size - 1;
vi->ctrl->rss.unclassified_queue = 0;
 
@@ -2286,6 +2288,121 @@ static void virtnet_init_default_rss(struct 
virtnet_info *vi)
netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
 }
 
+static void virtnet_get_hashflow(const struct virtnet_info *vi, struct 
ethtool_rxnfc *info)
+{
+   info->data = 0;
+   switch (info->flow_type) {
+   case TCP_V4_FLOW:
+   if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) {
+   info->data = RXH_IP_SRC | RXH_IP_DST |
+RXH_L4_B_0_1 | RXH_L4_B_2_3;
+   } else if (vi->rss_hash_types_saved & 
VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
+   info->data = RXH_IP_SRC | RXH_IP_DST;
+   }
+   break;
+   case TCP_V6_FLOW:
+   if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) {
+   info->data = RXH_IP_SRC | RXH_IP_DST |
+RXH_L4_B_0_1 | RXH_L4_B_2_3;
+   } else if (vi->rss_hash_types_saved & 
VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
+   info->data = RXH_IP_SRC | RXH_IP_DST;
+   }
+   break;
+   case UDP_V4_FLOW:
+   if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) {
+   info->data = RXH_IP_SRC | RXH_IP_DST |
+RXH_L4_B_0_1 | RXH_L4_B_2_3;
+   } else if (vi->rss_hash_types_saved & 
VIRTIO_NET_RSS_HASH_TYPE_IPv4) {
+   info->data = RXH_IP_SRC | RXH_IP_DST;
+   }
+   break;
+   case UDP_V6_FLOW:
+   if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) {
+   info->data = RXH_IP_SRC | RXH_IP_DST |
+RXH_L4_B_0_1 | RXH_L4_B_2_3;
+   } else if (vi->rss_hash_types_saved & 
VIRTIO_NET_RSS_HASH_TYPE_IPv6) {
+   info->data = RXH_IP_SRC | RXH_IP_DST;
+   }
+   break;
+   case IPV4_FLOW:
+   if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
+   info->data = RXH_IP_SRC | RXH_IP_DST;
+
+   break;
+   case IPV6_FLOW:
+   if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6)
+   info->data = RXH_IP_SRC | RXH_IP_DST;
+
+   break;
+   default:
+   info->data = 0;
+   break;
+   }
+}
+
+static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc 
*info)
+{
+   u32 new_hashtypes = vi->rss_hash_types_saved;
+   bool is_disable = info->data & RXH_DISCARD;
+   bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | 
RXH_L4_B_2_3);
+
+   /* supports only 'sd', 'sdfn' and 'r' */
+   if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable))
+   return false;
+
+   switch (info->flow_type) {
+   case TCP_V4_FLOW:
+   new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | 
VIRTIO_NET_RSS_HASH_TYPE_TCPv4);
+   if (!is_disable)
+   new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
+   | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0);
+   break;
+   case UDP_V4_FLOW:
+   new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | 
VIRTIO_NET_RSS_HASH_TYPE_UDPv4);
+   if (!is_disable)
+   new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
+   | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0);
+   break;
+   case IPV4_FLOW:
+   

[PATCH v3 3/4] drivers/net/virtio_net: Added RSS hash report.

2022-02-08 Thread Andrew Melnychenko
Added features for RSS hash report.
If hash is provided - it sets to skb.
Added checks if rss and/or hash are enabled together.

Signed-off-by: Andrew Melnychenko 
---
 drivers/net/virtio_net.c | 51 ++--
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 495aed524e33..543da2fbdd2d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -227,6 +227,7 @@ struct virtnet_info {
 
/* Host supports rss and/or hash report */
bool has_rss;
+   bool has_rss_hash_report;
u8 rss_key_size;
u16 rss_indir_table_size;
u32 rss_hash_types_supported;
@@ -421,7 +422,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 
hdr_len = vi->hdr_len;
if (vi->mergeable_rx_bufs)
-   hdr_padded_len = sizeof(*hdr);
+   hdr_padded_len = hdr_len;
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
@@ -1156,6 +1157,8 @@ static void receive_buf(struct virtnet_info *vi, struct 
receive_queue *rq,
struct net_device *dev = vi->dev;
struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr;
+   struct virtio_net_hdr_v1_hash *hdr_hash;
+   enum pkt_hash_types rss_hash_type;
 
if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
pr_debug("%s: short packet %i\n", dev->name, len);
@@ -1182,6 +1185,29 @@ static void receive_buf(struct virtnet_info *vi, struct 
receive_queue *rq,
return;
 
hdr = skb_vnet_hdr(skb);
+   if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) {
+   hdr_hash = (struct virtio_net_hdr_v1_hash *)(hdr);
+
+   switch (hdr_hash->hash_report) {
+   case VIRTIO_NET_HASH_REPORT_TCPv4:
+   case VIRTIO_NET_HASH_REPORT_UDPv4:
+   case VIRTIO_NET_HASH_REPORT_TCPv6:
+   case VIRTIO_NET_HASH_REPORT_UDPv6:
+   case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
+   case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
+   rss_hash_type = PKT_HASH_TYPE_L4;
+   break;
+   case VIRTIO_NET_HASH_REPORT_IPv4:
+   case VIRTIO_NET_HASH_REPORT_IPv6:
+   case VIRTIO_NET_HASH_REPORT_IPv6_EX:
+   rss_hash_type = PKT_HASH_TYPE_L3;
+   break;
+   case VIRTIO_NET_HASH_REPORT_NONE:
+   default:
+   rss_hash_type = PKT_HASH_TYPE_NONE;
+   }
+   skb_set_hash(skb, hdr_hash->hash_value, rss_hash_type);
+   }
 
if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -2232,7 +2258,8 @@ static bool virtnet_commit_rss_command(struct 
virtnet_info *vi)
sg_set_buf([3], vi->ctrl->rss.key, sg_buf_size);
 
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
- VIRTIO_NET_CTRL_MQ_RSS_CONFIG, sgs)) {
+ vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG
+ : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) {
dev_warn(>dev, "VIRTIONET issue with committing RSS 
sgs\n");
return false;
}
@@ -3230,6 +3257,8 @@ static bool virtnet_validate_features(struct 
virtio_device *vdev)
 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
 "VIRTIO_NET_F_CTRL_VQ") ||
 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
+"VIRTIO_NET_F_CTRL_VQ") ||
+VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
 "VIRTIO_NET_F_CTRL_VQ"))) {
return false;
}
@@ -3365,8 +3394,13 @@ static int virtnet_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
vi->mergeable_rx_bufs = true;
 
-   if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
+   if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
+   vi->has_rss_hash_report = true;
+
+   if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
vi->has_rss = true;
+
+   if (vi->has_rss || vi->has_rss_hash_report) {
vi->rss_indir_table_size =
virtio_cread16(vdev, offsetof(struct virtio_net_config,
rss_max_indirection_table_length));
@@ -3382,8 +3416,11 @@ static int virtnet_probe(struct virtio_device *vdev)
 
dev->hw_features |= NETIF_F_RXHASH;
}
-   if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
-   virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
+
+   if (vi->has_rss_hash_report)
+   vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
+   else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
+virtio_has_feature(vdev, 

[PATCH v3 1/4] drivers/net/virtio_net: Fixed padded vheader to use v1 with hash.

2022-02-08 Thread Andrew Melnychenko
The header v1 provides additional info about RSS.
Added changes to computing proper header length.
In the next patches, the header may contain RSS hash info
for the hash population.

Signed-off-by: Andrew Melnychenko 
---
 drivers/net/virtio_net.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index a801ea40908f..1404e683a2fd 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -242,13 +242,13 @@ struct virtnet_info {
 };
 
 struct padded_vnet_hdr {
-   struct virtio_net_hdr_mrg_rxbuf hdr;
+   struct virtio_net_hdr_v1_hash hdr;
/*
 * hdr is in a separate sg buffer, and data sg buffer shares same page
 * with this header sg. This padding makes next sg 16 byte aligned
 * after the header.
 */
-   char padding[4];
+   char padding[12];
 };
 
 static bool is_xdp_frame(void *ptr)
@@ -1266,7 +1266,8 @@ static unsigned int get_mergeable_buf_len(struct 
receive_queue *rq,
  struct ewma_pkt_len *avg_pkt_len,
  unsigned int room)
 {
-   const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   struct virtnet_info *vi = rq->vq->vdev->priv;
+   const size_t hdr_len = vi->hdr_len;
unsigned int len;
 
if (room)
@@ -2851,7 +2852,7 @@ static void virtnet_del_vqs(struct virtnet_info *vi)
  */
 static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct 
virtqueue *vq)
 {
-   const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+   const unsigned int hdr_len = vi->hdr_len;
unsigned int rq_size = virtqueue_get_vring_size(vq);
unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : 
vi->dev->max_mtu;
unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
-- 
2.34.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v3 0/4] RSS support for VirtioNet.

2022-02-08 Thread Andrew Melnychenko
Virtio-net supports "hardware" RSS with toeplitz key.
Also, it allows receiving calculated hash in vheader
that may be used with RPS.
Added ethtools callbacks to manipulate RSS.

Technically hash calculation may be set only for
SRC+DST and SRC+DST+PORTSRC+PORTDST hashflows.
The completely disabling hash calculation for TCP or UDP
would disable hash calculation for IP.

RSS/RXHASH is disabled by default.

Changes since v2:
* Fixed issue with calculating padded header length.
  During review/tests, there was found an issue that
  will crash the kernel if VIRTIO_NET_F_MRG_RXBUF
  was not set. (thx to Jason Wang )
* Refactored the code according to review.

Changes since v1:
* Refactored virtnet_set_hashflow.
* Refactored virtio_net_ctrl_rss.
* Moved hunks between patches a bit.

Changes since rfc:
* Code refactored.
* Patches reformatted.
* Added feature validation.

Andrew Melnychenko (4):
  drivers/net/virtio_net: Fixed padded vheader to use v1 with hash.
  drivers/net/virtio_net: Added basic RSS support.
  drivers/net/virtio_net: Added RSS hash report.
  drivers/net/virtio_net: Added RSS hash report control.

 drivers/net/virtio_net.c | 382 +--
 1 file changed, 369 insertions(+), 13 deletions(-)

-- 
2.34.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH 0/5] TUN/VirtioNet USO features support.

2022-02-08 Thread Andrew Melnichenko
Hi people,
Can you please review this series?

On Wed, Jan 26, 2022 at 10:32 AM Yuri Benditovich
 wrote:
>
> On Wed, Jan 26, 2022 at 9:54 AM Xuan Zhuo  wrote:
> >
> > On Tue, 25 Jan 2022 10:46:57 +0200, Andrew Melnychenko  
> > wrote:
> > > Added new offloads for TUN devices TUN_F_USO4 and TUN_F_USO6.
> > > Technically they enable NETIF_F_GSO_UDP_L4
> > > (and only if USO4 & USO6 are set simultaneously).
> > > It allows to transmission of large UDP packets.
> > >
> > > Different features USO4 and USO6 are required for qemu where Windows 
> > > guests can
> > > enable disable USO receives for IPv4 and IPv6 separately.
> > > On the other side, Linux can't really differentiate USO4 and USO6, for 
> > > now.
> > > For now, to enable USO for TUN it requires enabling USO4 and USO6 
> > > together.
> > > In the future, there would be a mechanism to control UDP_L4 GSO 
> > > separately.
> > >
> > > Test it WIP Qemu https://github.com/daynix/qemu/tree/Dev_USOv2
> > >
> > > New types for VirtioNet already on mailing:
> > > https://lists.oasis-open.org/archives/virtio-comment/202110/msg00010.html
> >
> > Seems like this hasn't been upvoted yet.
> >
> > https://github.com/oasis-tcs/virtio-spec#use-of-github-issues
>
> Yes, correct. This is a reason why this series of patches is RFC.
>
> >
> > Thanks.
> >
> > >
> > > Also, there is a known issue with transmitting packages between two 
> > > guests.
> > > Without hacks with skb's GSO - packages are still segmented on the host's 
> > > postrouting.
> > >
> > > Andrew Melnychenko (5):
> > >   uapi/linux/if_tun.h: Added new ioctl for tun/tap.
> > >   driver/net/tun: Added features for USO.
> > >   uapi/linux/virtio_net.h: Added USO types.
> > >   linux/virtio_net.h: Added Support for GSO_UDP_L4 offload.
> > >   drivers/net/virtio_net.c: Added USO support.
> > >
> > >  drivers/net/tap.c   | 18 --
> > >  drivers/net/tun.c   | 15 ++-
> > >  drivers/net/virtio_net.c| 22 ++
> > >  include/linux/virtio_net.h  | 11 +++
> > >  include/uapi/linux/if_tun.h |  3 +++
> > >  include/uapi/linux/virtio_net.h |  4 
> > >  6 files changed, 66 insertions(+), 7 deletions(-)
> > >
> > > --
> > > 2.34.1
> > >
> > > ___
> > > Virtualization mailing list
> > > Virtualization@lists.linux-foundation.org
> > > https://lists.linuxfoundation.org/mailman/listinfo/virtualization
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 06/31] vhost: Route guest->host notification through shadow virtqueue

2022-02-08 Thread Jason Wang


在 2022/1/31 下午7:33, Eugenio Perez Martin 写道:

On Fri, Jan 28, 2022 at 7:57 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

At this moment no buffer forwarding will be performed in SVQ mode: Qemu
just forward the guest's kicks to the device. This commit also set up
SVQs in the vhost device.

Host memory notifiers regions are left out for simplicity, and they will
not be addressed in this series.


I wonder if it's better to squash this into patch 5 since it gives us a
full guest->host forwarding.


I'm fine with that if you think it makes the review easier.



Yes please.





Signed-off-by: Eugenio Pérez 
---
   include/hw/virtio/vhost-vdpa.h |   4 ++
   hw/virtio/vhost-vdpa.c | 122 -
   2 files changed, 124 insertions(+), 2 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 3ce79a646d..009a9f3b6b 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -12,6 +12,8 @@
   #ifndef HW_VIRTIO_VHOST_VDPA_H
   #define HW_VIRTIO_VHOST_VDPA_H

+#include 
+
   #include "hw/virtio/virtio.h"
   #include "standard-headers/linux/vhost_types.h"

@@ -27,6 +29,8 @@ typedef struct vhost_vdpa {
   bool iotlb_batch_begin_sent;
   MemoryListener listener;
   struct vhost_vdpa_iova_range iova_range;
+bool shadow_vqs_enabled;
+GPtrArray *shadow_vqs;
   struct vhost_dev *dev;
   VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
   } VhostVDPA;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 6c10a7f05f..18de14f0fb 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -17,12 +17,14 @@
   #include "hw/virtio/vhost.h"
   #include "hw/virtio/vhost-backend.h"
   #include "hw/virtio/virtio-net.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
   #include "hw/virtio/vhost-vdpa.h"
   #include "exec/address-spaces.h"
   #include "qemu/main-loop.h"
   #include "cpu.h"
   #include "trace.h"
   #include "qemu-common.h"
+#include "qapi/error.h"

   /*
* Return one past the end of the end of section. Be careful with uint64_t
@@ -409,8 +411,14 @@ err:

   static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
   {
+struct vhost_vdpa *v = dev->opaque;
   int i;

+if (v->shadow_vqs_enabled) {
+/* SVQ is not compatible with host notifiers mr */


I guess there should be a TODO or FIXME here.


Sure I can add it.


+return;
+}
+
   for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
   if (vhost_vdpa_host_notifier_init(dev, i)) {
   goto err;
@@ -424,6 +432,17 @@ err:
   return;
   }

+static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
+{
+struct vhost_vdpa *v = dev->opaque;
+size_t idx;
+
+for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
+vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
+}
+g_ptr_array_free(v->shadow_vqs, true);
+}
+
   static int vhost_vdpa_cleanup(struct vhost_dev *dev)
   {
   struct vhost_vdpa *v;
@@ -432,6 +451,7 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
   trace_vhost_vdpa_cleanup(dev, v);
   vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
   memory_listener_unregister(>listener);
+vhost_vdpa_svq_cleanup(dev);

   dev->opaque = NULL;
   ram_block_discard_disable(false);
@@ -507,9 +527,15 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev,

   static int vhost_vdpa_reset_device(struct vhost_dev *dev)
   {
+struct vhost_vdpa *v = dev->opaque;
   int ret;
   uint8_t status = 0;

+for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
+VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
+vhost_svq_stop(svq);
+}
+
   ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, );
   trace_vhost_vdpa_reset_device(dev, status);
   return ret;
@@ -639,13 +665,28 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
   return ret;
   }

-static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
-   struct vhost_vring_file *file)
+static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
+ struct vhost_vring_file *file)
   {
   trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
   return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
   }

+static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
+   struct vhost_vring_file *file)
+{
+struct vhost_vdpa *v = dev->opaque;
+int vdpa_idx = vhost_vdpa_get_vq_index(dev, file->index);
+
+if (v->shadow_vqs_enabled) {
+VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
+vhost_svq_set_svq_kick_fd(svq, file->fd);
+return 0;
+} else {
+return vhost_vdpa_set_vring_dev_kick(dev, file);
+}
+}
+
   static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
  

Re: [PATCH 04/31] vdpa: Add vhost_svq_set_svq_kick_fd

2022-02-08 Thread Jason Wang


在 2022/1/31 下午6:18, Eugenio Perez Martin 写道:

On Fri, Jan 28, 2022 at 7:29 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

This function allows the vhost-vdpa backend to override kick_fd.

Signed-off-by: Eugenio Pérez 
---
   hw/virtio/vhost-shadow-virtqueue.h |  1 +
   hw/virtio/vhost-shadow-virtqueue.c | 45 ++
   2 files changed, 46 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 400effd9f2..a56ecfc09d 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -15,6 +15,7 @@

   typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;

+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
   const EventNotifier *vhost_svq_get_dev_kick_notifier(
 const VhostShadowVirtqueue 
*svq);

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index bd87110073..21534bc94d 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -11,6 +11,7 @@
   #include "hw/virtio/vhost-shadow-virtqueue.h"

   #include "qemu/error-report.h"
+#include "qemu/main-loop.h"

   /* Shadow virtqueue to relay notifications */
   typedef struct VhostShadowVirtqueue {
@@ -18,8 +19,20 @@ typedef struct VhostShadowVirtqueue {
   EventNotifier hdev_kick;
   /* Shadow call notifier, sent to vhost */
   EventNotifier hdev_call;
+
+/*
+ * Borrowed virtqueue's guest to host notifier.
+ * To borrow it in this event notifier allows to register on the event
+ * loop and access the associated shadow virtqueue easily. If we use the
+ * VirtQueue, we don't have an easy way to retrieve it.
+ *
+ * So shadow virtqueue must not clean it, or we would lose VirtQueue one.
+ */
+EventNotifier svq_kick;
   } VhostShadowVirtqueue;

+#define INVALID_SVQ_KICK_FD -1
+
   /**
* The notifier that SVQ will use to notify the device.
*/
@@ -29,6 +42,35 @@ const EventNotifier *vhost_svq_get_dev_kick_notifier(
   return >hdev_kick;
   }

+/**
+ * Set a new file descriptor for the guest to kick SVQ and notify for avail
+ *
+ * @svq  The svq
+ * @svq_kick_fd  The new svq kick fd
+ */
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
+{
+EventNotifier tmp;
+bool check_old = INVALID_SVQ_KICK_FD !=
+ event_notifier_get_fd(>svq_kick);
+
+if (check_old) {
+event_notifier_set_handler(>svq_kick, NULL);
+event_notifier_init_fd(, event_notifier_get_fd(>svq_kick));
+}


It looks to me we don't do similar things in vhost-net. Any reason for
caring about the old svq_kick?


Do you mean to check for old kick_fd in case we miss notifications,
and explicitly omit the INVALID_SVQ_KICK_FD?



Yes.




If you mean qemu's vhost-net, I guess it's because the device's kick
fd is never changed in all the vhost device lifecycle, it's only set
at the beginning. Previous RFC also depended on that, but you
suggested better vhost and SVQ in v4 feedback if I understood
correctly [1]. Or am I missing something?



No, I forgot that. But in this case we should have a better dealing with 
the the conversion from valid fd to -1 by disabling the handler.





Qemu's vhost-net does not need to use this because it is not polling
it. For kernel's vhost, I guess the closest is the use of pollstop and
pollstart at vhost_vring_ioctl.

In my opinion, I think that SVQ code size can benefit from now
allowing to override kick_fd from the start of the operation. Not from
initialization, but start. But I can see the benefits of having the
change into account from this moment so it's more resilient to the
future.


+
+/*
+ * event_notifier_set_handler already checks for guest's notifications if
+ * they arrive to the new file descriptor in the switch, so there is no
+ * need to explicitely check for them.
+ */
+event_notifier_init_fd(>svq_kick, svq_kick_fd);
+
+if (!check_old || event_notifier_test_and_clear()) {
+event_notifier_set(>hdev_kick);


Any reason we need to kick the device directly here?


At this point of the series only notifications are forwarded, not
buffers. If kick_fd is set, we need to check the old one, the same way
as vhost checks the masked notifier in case of change.



I meant we need to kick the svq instead of vhost-vdpa in this case?

Thanks




Thanks!

[1] https://lists.gnu.org/archive/html/qemu-devel/2021-10/msg03152.html
, from "I'd suggest to not depend on this since it:"



Thanks



+}
+}
+
   /**
* Creates vhost shadow virtqueue, and instruct vhost device to use the 
shadow
* methods and file descriptors.
@@ -52,6 +94,9 @@ VhostShadowVirtqueue *vhost_svq_new(void)
   goto err_init_hdev_call;
   }

+/* Placeholder descriptor, it should be deleted at set_kick_fd */
+event_notifier_init_fd(>svq_kick, 

Re: [PATCH 00/31] vDPA shadow virtqueue

2022-02-08 Thread Jason Wang


在 2022/1/31 下午5:15, Eugenio Perez Martin 写道:

On Fri, Jan 28, 2022 at 7:02 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

This series enables shadow virtqueue (SVQ) for vhost-vdpa devices. This
is intended as a new method of tracking the memory the devices touch
during a migration process: Instead of relay on vhost device's dirty
logging capability, SVQ intercepts the VQ dataplane forwarding the
descriptors between VM and device. This way qemu is the effective
writer of guests memory, like in qemu's emulated virtio device
operation.

When SVQ is enabled qemu offers a new virtual address space to the
device to read and write into, and it maps new vrings and the guest
memory in it. SVQ also intercepts kicks and calls between the device
and the guest. Used buffers relay would cause dirty memory being
tracked, but at this RFC SVQ is not enabled on migration automatically.

Thanks of being a buffers relay system, SVQ can be used also to
communicate devices and drivers with different capabilities, like
devices that only support packed vring and not split and old guests with
no driver packed support.

It is based on the ideas of DPDK SW assisted LM, in the series of
DPDK's https://patchwork.dpdk.org/cover/48370/ . However, these does
not map the shadow vq in guest's VA, but in qemu's.

This version of SVQ is limited in the amount of features it can use with
guest and device, because this series is already very big otherwise.
Features like indirect or event_idx will be addressed in future series.

SVQ needs to be enabled with cmdline parameter x-svq, like:

-netdev type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0,x-svq=true

In this version it cannot be enabled or disabled in runtime. Further
series will remove this limitation and will enable it only for migration
time.

Some patches are intentionally very small to ease review, but they can
be squashed if preferred.

Patches 1-10 prepares the SVQ and QEMU to support both guest to device
and device to guest notifications forwarding, with the extra qemu hop.
That part can be tested in isolation if cmdline change is reproduced.

Patches from 11 to 18 implement the actual buffer forwarding, but with
no IOMMU support. It requires a vdpa device capable of addressing all
qemu vaddr.

Patches 19 to 23 adds the iommu support, so the device with address
range limitations can access SVQ through this new virtual address space
created.

The rest of the series add the last pieces needed for migration.

Comments are welcome.


I wonder the performance impact. So performance numbers are more than
welcomed.


Sure, I'll do it for the next revision. Since this one brings a decent
amount of changes, I chose to collect the feedback first.



A simple single TCP_STREAM netperf test should be sufficient to give 
some basic understanding about the performance impact.


Thanks




Thanks!


Thanks



TODO:
* Event, indirect, packed, and other features of virtio.
* To separate buffers forwarding in its own AIO context, so we can
throw more threads to that task and we don't need to stop the main
event loop.
* Support virtio-net control vq.
* Proper documentation.

Changes from v5 RFC:
* Remove dynamic enablement of SVQ, making less dependent of the device.
* Enable live migration if SVQ is enabled.
* Fix SVQ when driver reset.
* Comments addressed, specially in the iova area.
* Rebase on latest master, adding multiqueue support (but no networking
control vq processing).
v5 link:
https://lists.gnu.org/archive/html/qemu-devel/2021-10/msg07250.html

Changes from v4 RFC:
* Support of allocating / freeing iova ranges in IOVA tree. Extending
already present iova-tree for that.
* Proper validation of guest features. Now SVQ can negotiate a
different set of features with the device when enabled.
* Support of host notifiers memory regions
* Handling of SVQ full queue in case guest's descriptors span to
different memory regions (qemu's VA chunks).
* Flush pending used buffers at end of SVQ operation.
* QMP command now looks by NetClientState name. Other devices will need
to implement it's way to enable vdpa.
* Rename QMP command to set, so it looks more like a way of working
* Better use of qemu error system
* Make a few assertions proper error-handling paths.
* Add more documentation
* Less coupling of virtio / vhost, that could cause friction on changes
* Addressed many other small comments and small fixes.

Changes from v3 RFC:
* Move everything to vhost-vdpa backend. A big change, this allowed
  some cleanup but more code has been added in other places.
* More use of glib utilities, especially to manage memory.
v3 link:
https://lists.nongnu.org/archive/html/qemu-devel/2021-05/msg06032.html

Changes from v2 RFC:
* Adding vhost-vdpa devices support
* Fixed some memory leaks pointed by different comments
v2 link:
https://lists.nongnu.org/archive/html/qemu-devel/2021-03/msg05600.html

Changes from v1 RFC:
* Use QMP 

Re: [PATCH 28/31] vdpa: Expose VHOST_F_LOG_ALL on SVQ

2022-02-08 Thread Jason Wang


在 2022/2/1 下午7:45, Eugenio Perez Martin 写道:

On Sun, Jan 30, 2022 at 7:50 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

SVQ is able to log the dirty bits by itself, so let's use it to not
block migration.

Also, ignore set and clear of VHOST_F_LOG_ALL on set_features if SVQ is
enabled. Even if the device supports it, the reports would be nonsense
because SVQ memory is in the qemu region.

The log region is still allocated. Future changes might skip that, but
this series is already long enough.

Signed-off-by: Eugenio Pérez 
---
   hw/virtio/vhost-vdpa.c | 20 
   1 file changed, 20 insertions(+)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index fb0a338baa..75090d65e8 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1022,6 +1022,9 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, 
uint64_t *features)
   if (ret == 0 && v->shadow_vqs_enabled) {
   /* Filter only features that SVQ can offer to guest */
   vhost_svq_valid_guest_features(features);
+
+/* Add SVQ logging capabilities */
+*features |= BIT_ULL(VHOST_F_LOG_ALL);
   }

   return ret;
@@ -1039,8 +1042,25 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,

   if (v->shadow_vqs_enabled) {
   uint64_t dev_features, svq_features, acked_features;
+uint8_t status = 0;
   bool ok;

+ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, );
+if (unlikely(ret)) {
+return ret;
+}
+
+if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
+/*
+ * vhost is trying to enable or disable _F_LOG, and the device
+ * would report wrong dirty pages. SVQ handles it.
+ */


I fail to understand this comment, I'd think there's no way to disable
dirty page tracking for SVQ.


vhost_log_global_{start,stop} are called at the beginning and end of
migration. To inform the device that it should start logging, they set
or clean VHOST_F_LOG_ALL at vhost_dev_set_log.



Yes, but for SVQ, we can't disable dirty page tracking, isn't it? The 
only thing is to ignore or filter out the F_LOG_ALL and pretend to be 
enabled and disabled.





While SVQ does not use VHOST_F_LOG_ALL, it exports the feature bit so
vhost does not block migration. Maybe we need to look for another way
to do this?



I'm fine with filtering since it's much more simpler, but I fail to 
understand why we need to check DRIVER_OK.


Thanks




Thanks!


Thanks



+return 0;
+}
+
+/* We must not ack _F_LOG if SVQ is enabled */
+features &= ~BIT_ULL(VHOST_F_LOG_ALL);
+
   ret = vhost_vdpa_get_dev_features(dev, _features);
   if (ret != 0) {
   error_report("Can't get vdpa device features, got (%d)", ret);


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 23/31] vdpa: Add custom IOTLB translations to SVQ

2022-02-08 Thread Jason Wang


在 2022/2/1 上午3:11, Eugenio Perez Martin 写道:

+return false;
+}
+
+/*
+ * Map->iova chunk size is ignored. What to do if descriptor
+ * (addr, size) does not fit is delegated to the device.
+ */

I think we need at least check the size and fail if the size doesn't
match here. Or is it possible that we have a buffer that may cross two
memory regions?


It should be impossible, since both iova_tree and VirtQueue should be
in sync regarding the memory regions updates. If a VirtQueue buffer
crosses many memory regions, iovec has more entries.

I can add a return false, but I'm not able to trigger that situation
even with a malformed driver.



Ok, but it won't harm to add a warn here.

Thanks

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 22/31] vhost: Add VhostIOVATree

2022-02-08 Thread Jason Wang


在 2022/2/2 上午1:27, Eugenio Perez Martin 写道:

On Sun, Jan 30, 2022 at 6:21 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

This tree is able to look for a translated address from an IOVA address.

At first glance it is similar to util/iova-tree. However, SVQ working on
devices with limited IOVA space need more capabilities,


So did the IOVA tree (e.g l2 vtd can only work in the range of GAW and
without RMRRs).



   like allocating
IOVA chunks or performing reverse translations (qemu addresses to iova).


This looks like a general request as well. So I wonder if we can simply
extend iova tree instead.


While both are true, I don't see code that performs allocations or
qemu vaddr to iova translations. But if the changes can be integrated
into iova-tree that would be great for sure.

The main drawback I see is the need to maintain two trees instead of
one for users of iova-tree. While complexity does not grow, it needs
to double the amount of work needed.



If you care about the performance, we can disable the reverse mapping 
during the allocation. For vIOMMU users it won't notice any performance 
penalty.


Thanks




Thanks!


Thanks



The allocation capability, as "assign a free IOVA address to this chunk
of memory in qemu's address space" allows shadow virtqueue to create a
new address space that is not restricted by guest's addressable one, so
we can allocate shadow vqs vrings outside of it.

It duplicates the tree so it can search efficiently both directions,
and it will signal overlap if iova or the translated address is
present in any tree.

Signed-off-by: Eugenio Pérez 
---
   hw/virtio/vhost-iova-tree.h |  27 +++
   hw/virtio/vhost-iova-tree.c | 157 
   hw/virtio/meson.build   |   2 +-
   3 files changed, 185 insertions(+), 1 deletion(-)
   create mode 100644 hw/virtio/vhost-iova-tree.h
   create mode 100644 hw/virtio/vhost-iova-tree.c

diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
new file mode 100644
index 00..610394eaf1
--- /dev/null
+++ b/hw/virtio/vhost-iova-tree.h
@@ -0,0 +1,27 @@
+/*
+ * vhost software live migration ring
+ *
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
+ * SPDX-FileContributor: Author: Eugenio Pérez 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
+#define HW_VIRTIO_VHOST_IOVA_TREE_H
+
+#include "qemu/iova-tree.h"
+#include "exec/memory.h"
+
+typedef struct VhostIOVATree VhostIOVATree;
+
+VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
+
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
+const DMAMap *map);
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
+
+#endif
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
new file mode 100644
index 00..0021dbaf54
--- /dev/null
+++ b/hw/virtio/vhost-iova-tree.c
@@ -0,0 +1,157 @@
+/*
+ * vhost software live migration ring
+ *
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
+ * SPDX-FileContributor: Author: Eugenio Pérez 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/iova-tree.h"
+#include "vhost-iova-tree.h"
+
+#define iova_min_addr qemu_real_host_page_size
+
+/**
+ * VhostIOVATree, able to:
+ * - Translate iova address
+ * - Reverse translate iova address (from translated to iova)
+ * - Allocate IOVA regions for translated range (potentially slow operation)
+ *
+ * Note that it cannot remove nodes.
+ */
+struct VhostIOVATree {
+/* First addresable iova address in the device */
+uint64_t iova_first;
+
+/* Last addressable iova address in the device */
+uint64_t iova_last;
+
+/* IOVA address to qemu memory maps. */
+IOVATree *iova_taddr_map;
+
+/* QEMU virtual memory address to iova maps */
+GTree *taddr_iova_map;
+};
+
+static gint vhost_iova_tree_cmp_taddr(gconstpointer a, gconstpointer b,
+  gpointer data)
+{
+const DMAMap *m1 = a, *m2 = b;
+
+if (m1->translated_addr > m2->translated_addr + m2->size) {
+return 1;
+}
+
+if (m1->translated_addr + m1->size < m2->translated_addr) {
+return -1;
+}
+
+/* Overlapped */
+return 0;
+}
+
+/**
+ * Create a new IOVA tree
+ *
+ * Returns the new IOVA tree
+ */
+VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
+{
+VhostIOVATree *tree = g_new(VhostIOVATree, 1);
+
+/* Some devices does not like 0 addresses */
+tree->iova_first = MAX(iova_first, iova_min_addr);
+tree->iova_last = iova_last;
+
+tree->iova_taddr_map = iova_tree_new();
+tree->taddr_iova_map = g_tree_new_full(vhost_iova_tree_cmp_taddr, NULL,
+ 

Re: [PATCH 18/31] vhost: Shadow virtqueue buffers forwarding

2022-02-08 Thread Jason Wang


在 2022/2/1 下午7:25, Eugenio Perez Martin 写道:

On Sun, Jan 30, 2022 at 7:47 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

@@ -272,6 +590,28 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, 
int svq_kick_fd)
   void vhost_svq_stop(VhostShadowVirtqueue *svq)
   {
   event_notifier_set_handler(>svq_kick, NULL);
+g_autofree VirtQueueElement *next_avail_elem = NULL;
+
+if (!svq->vq) {
+return;
+}
+
+/* Send all pending used descriptors to guest */
+vhost_svq_flush(svq, false);


Do we need to wait for all the pending descriptors to be completed here?


No, this function does not wait, it only completes the forwarding of
the *used* descriptors.

The best example is the net rx queue in my opinion. This call will
check SVQ's vring used_idx and will forward the last used descriptors
if any, but all available descriptors will remain as available for
qemu's VQ code.

To skip it would miss those last rx descriptors in migration.

Thanks!



So it's probably to not the best place to ask. It's more about the 
inflight descriptors so it should be TX instead of RX.


I can imagine the migration last phase, we should stop the vhost-vDPA 
before calling vhost_svq_stop(). Then we should be fine regardless of 
inflight descriptors.


Thanks





Thanks



+
+for (unsigned i = 0; i < svq->vring.num; ++i) {
+g_autofree VirtQueueElement *elem = NULL;
+elem = g_steal_pointer(>ring_id_maps[i]);
+if (elem) {
+virtqueue_detach_element(svq->vq, elem, elem->len);
+}
+}
+
+next_avail_elem = g_steal_pointer(>next_guest_avail_elem);
+if (next_avail_elem) {
+virtqueue_detach_element(svq->vq, next_avail_elem,
+ next_avail_elem->len);
+}
   }


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 18/31] vhost: Shadow virtqueue buffers forwarding

2022-02-08 Thread Jason Wang


在 2022/2/2 上午1:08, Eugenio Perez Martin 写道:

On Sun, Jan 30, 2022 at 5:43 AM Jason Wang  wrote:


在 2022/1/22 上午4:27, Eugenio Pérez 写道:

Initial version of shadow virtqueue that actually forward buffers. There
is no iommu support at the moment, and that will be addressed in future
patches of this series. Since all vhost-vdpa devices use forced IOMMU,
this means that SVQ is not usable at this point of the series on any
device.

For simplicity it only supports modern devices, that expects vring
in little endian, with split ring and no event idx or indirect
descriptors. Support for them will not be added in this series.

It reuses the VirtQueue code for the device part. The driver part is
based on Linux's virtio_ring driver, but with stripped functionality
and optimizations so it's easier to review.

However, forwarding buffers have some particular pieces: One of the most
unexpected ones is that a guest's buffer can expand through more than
one descriptor in SVQ. While this is handled gracefully by qemu's
emulated virtio devices, it may cause unexpected SVQ queue full. This
patch also solves it by checking for this condition at both guest's
kicks and device's calls. The code may be more elegant in the future if
SVQ code runs in its own iocontext.

Signed-off-by: Eugenio Pérez 
---
   hw/virtio/vhost-shadow-virtqueue.h |   2 +
   hw/virtio/vhost-shadow-virtqueue.c | 365 -
   hw/virtio/vhost-vdpa.c | 111 -
   3 files changed, 462 insertions(+), 16 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 39aef5ffdf..19c934af49 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -33,6 +33,8 @@ uint16_t vhost_svq_get_num(const VhostShadowVirtqueue *svq);
   size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
   size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);

+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
+ VirtQueue *vq);
   void vhost_svq_stop(VhostShadowVirtqueue *svq);

   VhostShadowVirtqueue *vhost_svq_new(uint16_t qsize);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 7c168075d7..a1a404f68f 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -9,6 +9,8 @@

   #include "qemu/osdep.h"
   #include "hw/virtio/vhost-shadow-virtqueue.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/virtio-access.h"
   #include "standard-headers/linux/vhost_types.h"

   #include "qemu/error-report.h"
@@ -36,6 +38,33 @@ typedef struct VhostShadowVirtqueue {

   /* Guest's call notifier, where SVQ calls guest. */
   EventNotifier svq_call;
+
+/* Virtio queue shadowing */
+VirtQueue *vq;
+
+/* Virtio device */
+VirtIODevice *vdev;
+
+/* Map for returning guest's descriptors */
+VirtQueueElement **ring_id_maps;
+
+/* Next VirtQueue element that guest made available */
+VirtQueueElement *next_guest_avail_elem;
+
+/* Next head to expose to device */
+uint16_t avail_idx_shadow;
+
+/* Next free descriptor */
+uint16_t free_head;
+
+/* Last seen used idx */
+uint16_t shadow_used_idx;
+
+/* Next head to consume from device */
+uint16_t last_used_idx;
+
+/* Cache for the exposed notification flag */
+bool notification;
   } VhostShadowVirtqueue;

   #define INVALID_SVQ_KICK_FD -1
@@ -148,30 +177,294 @@ bool vhost_svq_ack_guest_features(uint64_t dev_features,
   return true;
   }

-/* Forward guest notifications */
-static void vhost_handle_guest_kick(EventNotifier *n)
+/**
+ * Number of descriptors that SVQ can make available from the guest.
+ *
+ * @svq   The svq
+ */
+static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
   {
-VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
- svq_kick);
+return svq->vring.num - (svq->avail_idx_shadow - svq->shadow_used_idx);
+}
+
+static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable)
+{
+uint16_t notification_flag;

-if (unlikely(!event_notifier_test_and_clear(n))) {
+if (svq->notification == enable) {
+return;
+}
+
+notification_flag = cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
+
+svq->notification = enable;
+if (enable) {
+svq->vring.avail->flags &= ~notification_flag;
+} else {
+svq->vring.avail->flags |= notification_flag;
+}
+}
+
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
+const struct iovec *iovec,
+size_t num, bool more_descs, bool write)
+{
+uint16_t i = svq->free_head, last = svq->free_head;
+unsigned n;
+uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
+vring_desc_t *descs = svq->vring.desc;
+
+if (num == 0) {
+