Re: [PATCH v5 3/6] mm: make alloc_contig_range work at pageblock granularity

2022-02-13 Thread Christoph Hellwig
> +int
> +isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, int 
> isolate_before_boundary);

Please avoid the completely unreadably long line. i.e.

int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags,
int isolate_before_boundary);

Same in various other spots.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V4 4/4] vDPA/ifcvf: implement shared IRQ feature

2022-02-13 Thread Jason Wang


在 2022/2/3 下午3:27, Zhu Lingshan 写道:

On some platforms/devices, there may not be enough MSI vector
slots allocated for virtqueues and config changes. In such a case,
the interrupt sources(virtqueues, config changes) must share
an IRQ/vector, to avoid initialization failures, keep
the device functional.

This commit handles three cases:
(1) number of the allocated vectors == the number of virtqueues + 1
(config changes), every virtqueue and the config interrupt has
a separated vector/IRQ, the best and the most likely case.
(2) number of the allocated vectors is less than the best case, but
greater than 1. In this case, all virtqueues share a vector/IRQ,
the config interrupt has a separated vector/IRQ
(3) only one vector is allocated, in this case, the virtqueues and
the config interrupt share a vector/IRQ. The worst and most
unlikely case.

Otherwise, it needs to fail.

This commit introduces some helper functions:
ifcvf_set_vq_vector() and ifcvf_set_config_vector() sets virtqueue
vector and config vector in the device config space, so that
the device can send interrupt DMA.

This commit adds some fields in struct ifcvf_hw and re-placed
the existed fields to be aligned with the cacheline.

Signed-off-by: Zhu Lingshan 
---
  drivers/vdpa/ifcvf/ifcvf_base.c |  47 --
  drivers/vdpa/ifcvf/ifcvf_base.h |  23 ++-
  drivers/vdpa/ifcvf/ifcvf_main.c | 243 +++-
  3 files changed, 256 insertions(+), 57 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
index 397692ae671c..18dcb63ab1e3 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.c
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -15,6 +15,36 @@ struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw)
return container_of(hw, struct ifcvf_adapter, vf);
  }
  
+int ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector)

+{
+   struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
+   struct ifcvf_adapter *ifcvf = vf_to_adapter(hw);
+
+   ifc_iowrite16(qid, >queue_select);
+   ifc_iowrite16(vector, >queue_msix_vector);
+   if (ifc_ioread16(>queue_msix_vector) == VIRTIO_MSI_NO_VECTOR) {
+   IFCVF_ERR(ifcvf->pdev, "No msix vector for queue %u\n", qid);
+   return -EINVAL;
+   }



Let's leave this check for the caller, E.g can caller try to assign 
NO_VECTOR during uni-nit?




+
+   return 0;
+}
+
+int ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector)
+{
+   struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
+   struct ifcvf_adapter *ifcvf = vf_to_adapter(hw);
+
+   cfg = hw->common_cfg;
+   ifc_iowrite16(vector,  >msix_config);
+   if (ifc_ioread16(>msix_config) == VIRTIO_MSI_NO_VECTOR) {
+   IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n");
+   return -EINVAL;
+   }



Similar question as above.



+
+   return 0;
+}
+
  static void __iomem *get_cap_addr(struct ifcvf_hw *hw,
  struct virtio_pci_cap *cap)
  {
@@ -140,6 +170,8 @@ int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev)
  hw->common_cfg, hw->notify_base, hw->isr,
  hw->dev_cfg, hw->notify_off_multiplier);
  
+	hw->vqs_shared_irq = -EINVAL;

+
return 0;
  }
  
@@ -321,12 +353,6 @@ static int ifcvf_hw_enable(struct ifcvf_hw *hw)
  
  	ifcvf = vf_to_adapter(hw);

cfg = hw->common_cfg;
-   ifc_iowrite16(IFCVF_MSI_CONFIG_OFF, >msix_config);
-
-   if (ifc_ioread16(>msix_config) == VIRTIO_MSI_NO_VECTOR) {
-   IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n");
-   return -EINVAL;
-   }
  
  	for (i = 0; i < hw->nr_vring; i++) {

if (!hw->vring[i].ready)
@@ -340,15 +366,6 @@ static int ifcvf_hw_enable(struct ifcvf_hw *hw)
ifc_iowrite64_twopart(hw->vring[i].used, >queue_used_lo,
 >queue_used_hi);
ifc_iowrite16(hw->vring[i].size, >queue_size);
-   ifc_iowrite16(i + IFCVF_MSI_QUEUE_OFF, >queue_msix_vector);
-
-   if (ifc_ioread16(>queue_msix_vector) ==
-   VIRTIO_MSI_NO_VECTOR) {
-   IFCVF_ERR(ifcvf->pdev,
- "No msix vector for queue %u\n", i);
-   return -EINVAL;
-   }
-
ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx);
ifc_iowrite16(1, >queue_enable);
}
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index 949b4fb9d554..9cfe088c82e9 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -27,8 +27,6 @@
  
  #define IFCVF_QUEUE_ALIGNMENT	PAGE_SIZE

  #define IFCVF_QUEUE_MAX   32768
-#define IFCVF_MSI_CONFIG_OFF   0
-#define IFCVF_MSI_QUEUE_OFF1
  #define IFCVF_PCI_MAX_RESOURCE6
  
  #define IFCVF_LM_CFG_SIZE		0x40

@@ -42,6 +40,13 @@

Re: [PATCH V4 3/4] vhost_vdpa: don't setup irq offloading when irq_num < 0

2022-02-13 Thread Jason Wang


在 2022/2/3 下午3:27, Zhu Lingshan 写道:

When irq number is negative(e.g., -EINVAL), the virtqueue
may be disabled or the virtqueues are sharing a device irq.
In such case, we should not setup irq offloading for a virtqueue.

Signed-off-by: Zhu Lingshan 
---
  drivers/vhost/vdpa.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 851539807bc9..c4fcacb0de3a 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -96,6 +96,10 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, 
u16 qid)
if (!ops->get_vq_irq)
return;
  
+	irq = ops->get_vq_irq(vdpa, qid);

+   if (irq < 0)
+   return;
+
irq = ops->get_vq_irq(vdpa, qid);



So the get_vq_irq() will be called twice?



irq_bypass_unregister_producer(>call_ctx.producer);
if (!vq->call_ctx.ctx || irq < 0)



We're already checked irq against 0 here.

Thanks


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH V4 2/4] vDPA/ifcvf: implement device MSIX vector allocator

2022-02-13 Thread Jason Wang


在 2022/2/3 下午3:27, Zhu Lingshan 写道:

This commit implements a MSIX vector allocation helper
for vqs and config interrupts.

Signed-off-by: Zhu Lingshan 
---
  drivers/vdpa/ifcvf/ifcvf_main.c | 35 +++--
  1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index d1a6b5ab543c..44c89ab0b6da 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -58,14 +58,45 @@ static void ifcvf_free_irq(struct ifcvf_adapter *adapter, 
int queues)
ifcvf_free_irq_vectors(pdev);
  }
  
+/* ifcvf MSIX vectors allocator, this helper tries to allocate

+ * vectors for all virtqueues and the config interrupt.
+ * It returns the number of allocated vectors, negative
+ * return value when fails.
+ */
+static int ifcvf_alloc_vectors(struct ifcvf_adapter *adapter)
+{
+   struct pci_dev *pdev = adapter->pdev;
+   struct ifcvf_hw *vf = >vf;
+   int max_intr, ret;
+
+   /* all queues and config interrupt  */
+   max_intr = vf->nr_vring + 1;
+   ret = pci_alloc_irq_vectors(pdev, 1, max_intr, PCI_IRQ_MSIX | 
PCI_IRQ_AFFINITY);
+
+   if (ret < 0) {
+   IFCVF_ERR(pdev, "Failed to alloc IRQ vectors\n");
+   return ret;
+   }
+
+   if (ret < max_intr)
+   IFCVF_INFO(pdev,
+  "Requested %u vectors, however only %u allocated, lower 
performance\n",
+  max_intr, ret);
+
+   return ret;
+}
+
  static int ifcvf_request_irq(struct ifcvf_adapter *adapter)
  {
struct pci_dev *pdev = adapter->pdev;
struct ifcvf_hw *vf = >vf;
-   int vector, i, ret, irq;
+   int vector, nvectors, i, ret, irq;
u16 max_intr;
  
-	/* all queues and config interrupt  */

+   nvectors = ifcvf_alloc_vectors(adapter);
+   if (!(nvectors > 0))
+   return nvectors;



Why not simply checking by using nvectors <= 0? If ifcvf_alloc_vectors 
can return 0 this breaks the ifcvf_request_irq() caller's assumption.




+
max_intr = vf->nr_vring + 1;
  
  	ret = pci_alloc_irq_vectors(pdev, max_intr,



So irq is allocated twice here?

Thanks


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH V4 1/4] vDPA/ifcvf: implement IO read/write helpers in the header file

2022-02-13 Thread Jason Wang


在 2022/2/3 下午3:27, Zhu Lingshan 写道:

re-implement IO read/write helpers in the header file, so that
they can be utilized among modules.

Signed-off-by: Zhu Lingshan 



I wonder if we can simply use include/linux/virtio_pci_modern.h.

The accessors vp_ioreadX/writeX() there were decoupled from the 
virtio_pci_modern_device structure.


Thanks



---
  drivers/vdpa/ifcvf/ifcvf_base.c | 36 
  drivers/vdpa/ifcvf/ifcvf_base.h | 37 +
  2 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
index 7d41dfe48ade..397692ae671c 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.c
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -10,42 +10,6 @@
  
  #include "ifcvf_base.h"
  
-static inline u8 ifc_ioread8(u8 __iomem *addr)

-{
-   return ioread8(addr);
-}
-static inline u16 ifc_ioread16 (__le16 __iomem *addr)
-{
-   return ioread16(addr);
-}
-
-static inline u32 ifc_ioread32(__le32 __iomem *addr)
-{
-   return ioread32(addr);
-}
-
-static inline void ifc_iowrite8(u8 value, u8 __iomem *addr)
-{
-   iowrite8(value, addr);
-}
-
-static inline void ifc_iowrite16(u16 value, __le16 __iomem *addr)
-{
-   iowrite16(value, addr);
-}
-
-static inline void ifc_iowrite32(u32 value, __le32 __iomem *addr)
-{
-   iowrite32(value, addr);
-}
-
-static void ifc_iowrite64_twopart(u64 val,
- __le32 __iomem *lo, __le32 __iomem *hi)
-{
-   ifc_iowrite32((u32)val, lo);
-   ifc_iowrite32(val >> 32, hi);
-}
-
  struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw)
  {
return container_of(hw, struct ifcvf_adapter, vf);
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index c486873f370a..949b4fb9d554 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -42,6 +42,43 @@
  #define ifcvf_private_to_vf(adapter) \
(&((struct ifcvf_adapter *)adapter)->vf)
  
+static inline u8 ifc_ioread8(u8 __iomem *addr)

+{
+   return ioread8(addr);
+}
+
+static inline u16 ifc_ioread16(__le16 __iomem *addr)
+{
+   return ioread16(addr);
+}
+
+static inline u32 ifc_ioread32(__le32 __iomem *addr)
+{
+   return ioread32(addr);
+}
+
+static inline void ifc_iowrite8(u8 value, u8 __iomem *addr)
+{
+   iowrite8(value, addr);
+}
+
+static inline void ifc_iowrite16(u16 value, __le16 __iomem *addr)
+{
+   iowrite16(value, addr);
+}
+
+static inline void ifc_iowrite32(u32 value, __le32 __iomem *addr)
+{
+   iowrite32(value, addr);
+}
+
+static inline void ifc_iowrite64_twopart(u64 val,
+ __le32 __iomem *lo, __le32 __iomem *hi)
+{
+   ifc_iowrite32((u32)val, lo);
+   ifc_iowrite32(val >> 32, hi);
+}
+
  struct vring_info {
u64 desc;
u64 avail;


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v4 14/14] virtio_pci: queue_reset: support VIRTIO_F_RING_RESET

2022-02-13 Thread Xuan Zhuo
On Fri, 11 Feb 2022 15:05:40 +0800, Jason Wang  wrote:
>
> 在 2022/2/9 下午8:29, Xuan Zhuo 写道:
> > This patch implements virtio pci support for QUEUE RESET.
> >
> > Performing reset on a queue is divided into these steps:
> >
> > 1. reset_vq: reset one vq
> > 2. recycle the buffer from vq by virtqueue_detach_unused_buf()
> > 3. release the ring of the vq by vring_release_virtqueue()
> > 4. enable_reset_vq: re-enable the reset queue
> >
> > This patch implements reset_vq, enable_reset_vq in the pci scenario
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >   drivers/virtio/virtio_pci_common.c |  8 +--
> >   drivers/virtio/virtio_pci_modern.c | 80 --
> >   drivers/virtio/virtio_ring.c   |  2 +
> >   include/linux/virtio.h |  1 +
> >   4 files changed, 85 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_pci_common.c 
> > b/drivers/virtio/virtio_pci_common.c
> > index cb01eb0cb2e4..303637ac4914 100644
> > --- a/drivers/virtio/virtio_pci_common.c
> > +++ b/drivers/virtio/virtio_pci_common.c
> > @@ -255,9 +255,11 @@ static void vp_del_vq(struct virtqueue *vq)
> > struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
> > unsigned long flags;
> >
> > -   spin_lock_irqsave(_dev->lock, flags);
> > -   list_del(>node);
> > -   spin_unlock_irqrestore(_dev->lock, flags);
> > +   if (!vq->reset) {
> > +   spin_lock_irqsave(_dev->lock, flags);
> > +   list_del(>node);
> > +   spin_unlock_irqrestore(_dev->lock, flags);
> > +   }
> >
> > vp_dev->del_vq(info);
> > kfree(info);
> > diff --git a/drivers/virtio/virtio_pci_modern.c 
> > b/drivers/virtio/virtio_pci_modern.c
> > index d29d40bf0b45..cc45515eda50 100644
> > --- a/drivers/virtio/virtio_pci_modern.c
> > +++ b/drivers/virtio/virtio_pci_modern.c
> > @@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device 
> > *vdev, u64 features)
> > if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
> > pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
> > __virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
> > +
> > +   if (features & BIT_ULL(VIRTIO_F_RING_RESET))
> > +   __virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
> >   }
> >
> >   /* virtio config->finalize_features() implementation */
> > @@ -176,6 +179,70 @@ static void vp_reset(struct virtio_device *vdev)
> > vp_disable_cbs(vdev);
> >   }
> >
> > +static int vp_modern_reset_vq(struct virtqueue *vq)
> > +{
> > +   struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
> > +   struct virtio_pci_modern_device *mdev = _dev->mdev;
> > +   struct virtio_pci_vq_info *info;
> > +   unsigned long flags;
> > +   u16 msix_vec;
> > +
> > +   if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET))
> > +   return -ENOENT;
> > +
> > +   vp_modern_set_queue_reset(mdev, vq->index);
> > +
> > +   info = vp_dev->vqs[vq->index];
> > +   msix_vec = info->msix_vector;
> > +
> > +   /* Disable VQ callback. */
> > +   if (vp_dev->per_vq_vectors && msix_vec != VIRTIO_MSI_NO_VECTOR)
> > +   disable_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec));
>
>
> I think we need a comment to explain why per_vq_mode needs to be dealt
> with differently.
>
>
> > +
> > +   /* delete vq */
> > +   spin_lock_irqsave(_dev->lock, flags);
> > +   list_del(>node);
> > +   spin_unlock_irqrestore(_dev->lock, flags);
>
>
> So I don't see where vring is freed and vp_setup_vq() may try to
> allocate new memory, won't it be a memory leak in this case?
>
> Thanks
>
>
> > +
> > +   vq->reset = true;
> > +
> > +   INIT_LIST_HEAD(>node);
> > +
> > +   return 0;
> > +}
> > +
> > +static int vp_modern_enable_reset_vq(struct virtqueue *vq, u16 ring_num)
> > +{
> > +   struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
> > +   struct virtio_pci_modern_device *mdev = _dev->mdev;
> > +   struct virtio_pci_vq_info *info;
> > +   struct virtqueue *_vq;
> > +   u16 msix_vec;
> > +
> > +   if (!vq->reset)
> > +   return -EPERM;
> > +
> > +   /* check queue reset status */
> > +   if (vp_modern_get_queue_reset(mdev, vq->index) != 1)
> > +   return -EBUSY;
> > +
> > +   info = vp_dev->vqs[vq->index];
> > +   _vq = vp_setup_vq(vq->vdev, vq->index, NULL, NULL, NULL,
> > +info->msix_vector, ring_num);
> > +   if (IS_ERR(_vq)) {
> > +   vq->reset = true;
> > +   return PTR_ERR(_vq);
> > +   }
> > +
> > +   vp_modern_set_queue_enable(_dev->mdev, vq->index, true);
> > +
> > +   msix_vec = vp_dev->vqs[vq->index]->msix_vector;
> > +   if (vp_dev->per_vq_vectors && msix_vec != VIRTIO_MSI_NO_VECTOR)
> > +   enable_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec));
> > +
> > +   return 0;
> > +}
> > +
> >   static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
> >   {
> > return vp_modern_config_vector(_dev->mdev, vector);
> > @@ -231,10 +298,13 @@ static struct virtqueue *setup_vq(struct 
> > virtio_pci_device *vp_dev,
> >

Re: [PATCH v3 2/4] drivers/net/virtio_net: Added basic RSS support.

2022-02-13 Thread Willem de Bruijn
> > > @@ -3113,13 +3270,14 @@ static int virtnet_probe(struct virtio_device 
> > > *vdev)
> > > u16 max_queue_pairs;
> > > int mtu;
> > >
> > > -   /* Find if host supports multiqueue virtio_net device */
> > > -   err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
> > > -  struct virtio_net_config,
> > > -  max_virtqueue_pairs, _queue_pairs);
> > > +   /* Find if host supports multiqueue/rss virtio_net device */
> > > +   max_queue_pairs = 1;
> > > +   if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || 
> > > virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
> > > +   max_queue_pairs =
> > > +virtio_cread16(vdev, offsetof(struct 
> > > virtio_net_config, max_virtqueue_pairs));
> >
> > Instead of testing either feature and treating them as somewhat equal,
> > shouldn't RSS be dependent on MQ?
>
> No, RSS is dependent on CTRL_VQ. Technically RSS and MQ are similar features.

RSS depends on having multiple queues.

What would enabling VIRTIO_NET_F_RSS without VIRTIO_NET_F_MQ do?

> >
> > >
> > > /* We need at least 2 queue's */
> > > -   if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
> > > +   if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
> > > max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
> > > !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
> > > max_queue_pairs = 1;
> > > @@ -3207,6 +3365,23 @@ static int virtnet_probe(struct virtio_device 
> > > *vdev)
> > > if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
> > > vi->mergeable_rx_bufs = true;
> > >
> > > +   if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
> > > +   vi->has_rss = true;
> > > +   vi->rss_indir_table_size =
> > > +   virtio_cread16(vdev, offsetof(struct 
> > > virtio_net_config,
> > > +   rss_max_indirection_table_length));
> > > +   vi->rss_key_size =
> > > +   virtio_cread8(vdev, offsetof(struct 
> > > virtio_net_config, rss_max_key_size));
> > > +
> > > +   vi->rss_hash_types_supported =
> > > +   virtio_cread32(vdev, offsetof(struct 
> > > virtio_net_config, supported_hash_types));
> > > +   vi->rss_hash_types_supported &=
> > > +   ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX |
> > > + VIRTIO_NET_RSS_HASH_TYPE_TCP_EX |
> > > + VIRTIO_NET_RSS_HASH_TYPE_UDP_EX);
> > > +
> > > +   dev->hw_features |= NETIF_F_RXHASH;
> >
> > Only make the feature visible when the hash is actually reported in
> > the skb, patch 3.
>
> VirtioNET has two features: RSS(steering only) and hash(hash report in
> vnet header)
> Both features may be enabled/disabled separately:
> 1. rss on and hash off - packets steered to the corresponding vqs
> 2. rss off and hash on - packets steered by tap(like mq) but headers
> have properly calculated hash.
> 3. rss on and hash on - packets steered to corresponding vqs and hash
> is present in the header.
>
> RXHASH feature allows the user to enable/disable the rss/hash(any 
> combination).

I find that confusing, but.. I see that there is prior art where some
drivers enable/disable entire RSS load balancing based on this flag.
So ok.

> I think it's a good idea to leave RXHASH in patch 2/4 to give the user
> ability to manipulate the rss only feature.
> But, if you think that it requires to move it to the 3/4, I'll do it.
>
> >
> > Also, clearly separate the feature patches (2) rss, (3) rxhash, (4)
> > rxhash config.
>
> Currently:
> Patch 2/4 - adds VirtioNet rss feature.
> Patch 3/4 - adds VirtioNet hash report feature.
> Patch 4/4 - adds the ability to manipulate supported hash types.
>
> Can you provide more detailed suggestions on how to move hunks?

I gave one in the follow-on patch, to which you responded. That's probably it.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3 4/4] drivers/net/virtio_net: Added RSS hash report control.

2022-02-13 Thread Andrew Melnichenko
Hi all,


On Tue, Feb 8, 2022 at 10:59 PM Willem de Bruijn
 wrote:
>
> On Tue, Feb 8, 2022 at 1:19 PM Andrew Melnychenko  wrote:
> >
> > Now it's possible to control supported hashflows.
> > Added hashflow set/get callbacks.
> > Also, disabling RXH_IP_SRC/DST for TCP would disable then for UDP.
>
> I don't follow this comment. Can you elaborate?

I'll rephrase it in next version of patches.
The idea is that VirtioNet RSS doesn't distinguish IP hashes between
TCP and UDP.
For TCP and UDP it's possible to set IP+PORT hashes.
But disabling IP hashes will disable them for TCP and UDP simultaneously.
It's possible to set IP+PORT for TCP and IP for everything else(UDP, ICMP etc.)

>
> > TCP and UDP supports only:
> > ethtool -U eth0 rx-flow-hash tcp4 sd
> > RXH_IP_SRC + RXH_IP_DST
> > ethtool -U eth0 rx-flow-hash tcp4 sdfn
> > RXH_IP_SRC + RXH_IP_DST + RXH_L4_B_0_1 + RXH_L4_B_2_3
> >
> > Signed-off-by: Andrew Melnychenko 
> > ---
> >  drivers/net/virtio_net.c | 141 ++-
> >  1 file changed, 140 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 543da2fbdd2d..88759d5e693c 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -231,6 +231,7 @@ struct virtnet_info {
> > u8 rss_key_size;
> > u16 rss_indir_table_size;
> > u32 rss_hash_types_supported;
> > +   u32 rss_hash_types_saved;
>
> hash_types_active?

I think "hash_types_saved" is more suitable for the current field.
Idea is that the user may disable RSS/HASH and we need to save
what hash type configurations previously were enabled.
So, we can restore it when the user will enable RSS/HASH back.

>
> > +static bool virtnet_set_hashflow(struct virtnet_info *vi, struct 
> > ethtool_rxnfc *info)
> > +{
> > +   u32 new_hashtypes = vi->rss_hash_types_saved;
> > +   bool is_disable = info->data & RXH_DISCARD;
> > +   bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 
> > | RXH_L4_B_2_3);
> > +
> > +   /* supports only 'sd', 'sdfn' and 'r' */
> > +   if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | 
> > is_disable))
>
> maybe add an is_l3

There used to be "is_l3", but that variable was used only in that
condition statement.
So I've decided to inplace it.

>
> > +   return false;
> > +
> > +   switch (info->flow_type) {
> > +   case TCP_V4_FLOW:
> > +   new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | 
> > VIRTIO_NET_RSS_HASH_TYPE_TCPv4);
> > +   if (!is_disable)
> > +   new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
> > +   | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 
> > 0);
> > +   break;
> > +   case UDP_V4_FLOW:
> > +   new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | 
> > VIRTIO_NET_RSS_HASH_TYPE_UDPv4);
> > +   if (!is_disable)
> > +   new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4
> > +   | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 
> > 0);
> > +   break;
> > +   case IPV4_FLOW:
> > +   new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4;
> > +   if (!is_disable)
> > +   new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4;
> > +   break;
> > +   case TCP_V6_FLOW:
> > +   new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | 
> > VIRTIO_NET_RSS_HASH_TYPE_TCPv6);
> > +   if (!is_disable)
> > +   new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
> > +   | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 
> > 0);
> > +   break;
> > +   case UDP_V6_FLOW:
> > +   new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | 
> > VIRTIO_NET_RSS_HASH_TYPE_UDPv6);
> > +   if (!is_disable)
> > +   new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6
> > +   | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 
> > 0);
> > +   break;
> > +   case IPV6_FLOW:
> > +   new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6;
> > +   if (!is_disable)
> > +   new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6;
> > +   break;
> > +   default:
> > +   /* unsupported flow */
> > +   return false;
> > +   }
> > +
> > +   /* if unsupported hashtype was set */
> > +   if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported))
> > +   return false;
> > +
> > +   if (new_hashtypes != vi->rss_hash_types_saved) {
> > +   vi->rss_hash_types_saved = new_hashtypes;
>
> should only be updated if the commit function returned success?

Not really, we already made all checks against "supported" hash types.
Also, the commit function may not be called if RSS is disabled by the user.

Re: [PATCH v3 3/4] drivers/net/virtio_net: Added RSS hash report.

2022-02-13 Thread Andrew Melnichenko
Hi all,

On Tue, Feb 8, 2022 at 10:55 PM Willem de Bruijn
 wrote:
>
> On Tue, Feb 8, 2022 at 1:19 PM Andrew Melnychenko  wrote:
> >
> > Added features for RSS hash report.
> > If hash is provided - it sets to skb.
> > Added checks if rss and/or hash are enabled together.
> >
> > Signed-off-by: Andrew Melnychenko 
> > ---
> >  drivers/net/virtio_net.c | 51 ++--
> >  1 file changed, 44 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 495aed524e33..543da2fbdd2d 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -227,6 +227,7 @@ struct virtnet_info {
> >
> > /* Host supports rss and/or hash report */
> > bool has_rss;
> > +   bool has_rss_hash_report;
> > u8 rss_key_size;
> > u16 rss_indir_table_size;
> > u32 rss_hash_types_supported;
> > @@ -421,7 +422,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info 
> > *vi,
> >
> > hdr_len = vi->hdr_len;
> > if (vi->mergeable_rx_bufs)
> > -   hdr_padded_len = sizeof(*hdr);
> > +   hdr_padded_len = hdr_len;
>
> Belongs in patch 1?

Yeah, I'll move it.

>
> > else
> > hdr_padded_len = sizeof(struct padded_vnet_hdr);
> >
> > @@ -1156,6 +1157,8 @@ static void receive_buf(struct virtnet_info *vi, 
> > struct receive_queue *rq,
> > struct net_device *dev = vi->dev;
> > struct sk_buff *skb;
> > struct virtio_net_hdr_mrg_rxbuf *hdr;
> > +   struct virtio_net_hdr_v1_hash *hdr_hash;
> > +   enum pkt_hash_types rss_hash_type;
> >
> > if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
> > pr_debug("%s: short packet %i\n", dev->name, len);
> > @@ -1182,6 +1185,29 @@ static void receive_buf(struct virtnet_info *vi, 
> > struct receive_queue *rq,
> > return;
> >
> > hdr = skb_vnet_hdr(skb);
> > +   if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) {
>
> Can the first be true if the second is not?

Yes, RSS may be enabled, but the hash report feature is disabled.
For now, it's possible to enable/disable VirtioNet RSS by manipulating RXHASH.

>
> > +   hdr_hash = (struct virtio_net_hdr_v1_hash *)(hdr);
> > +
> > +   switch (hdr_hash->hash_report) {
> > +   case VIRTIO_NET_HASH_REPORT_TCPv4:
> > +   case VIRTIO_NET_HASH_REPORT_UDPv4:
> > +   case VIRTIO_NET_HASH_REPORT_TCPv6:
> > +   case VIRTIO_NET_HASH_REPORT_UDPv6:
> > +   case VIRTIO_NET_HASH_REPORT_TCPv6_EX:
> > +   case VIRTIO_NET_HASH_REPORT_UDPv6_EX:
> > +   rss_hash_type = PKT_HASH_TYPE_L4;
> > +   break;
> > +   case VIRTIO_NET_HASH_REPORT_IPv4:
> > +   case VIRTIO_NET_HASH_REPORT_IPv6:
> > +   case VIRTIO_NET_HASH_REPORT_IPv6_EX:
> > +   rss_hash_type = PKT_HASH_TYPE_L3;
> > +   break;
> > +   case VIRTIO_NET_HASH_REPORT_NONE:
> > +   default:
> > +   rss_hash_type = PKT_HASH_TYPE_NONE;
> > +   }
> > +   skb_set_hash(skb, hdr_hash->hash_value, rss_hash_type);
> > +   }
>
> so many lines, perhaps deserves a helper function

Ok, I'll create the helper.

>
> >
> > if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
> > skb->ip_summed = CHECKSUM_UNNECESSARY;
> > @@ -2232,7 +2258,8 @@ static bool virtnet_commit_rss_command(struct 
> > virtnet_info *vi)
> > sg_set_buf([3], vi->ctrl->rss.key, sg_buf_size);
> >
> > if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
> > - VIRTIO_NET_CTRL_MQ_RSS_CONFIG, sgs)) {
> > + vi->has_rss ? 
> > VIRTIO_NET_CTRL_MQ_RSS_CONFIG
> > + : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) {
> > dev_warn(>dev, "VIRTIONET issue with committing RSS 
> > sgs\n");
> > return false;
> > }
> > @@ -3230,6 +3257,8 @@ static bool virtnet_validate_features(struct 
> > virtio_device *vdev)
> >  VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
> >  "VIRTIO_NET_F_CTRL_VQ") ||
> >  VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS,
> > +"VIRTIO_NET_F_CTRL_VQ") ||
> > +VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT,
> >  "VIRTIO_NET_F_CTRL_VQ"))) {
> > return false;
> > }
> > @@ -3365,8 +3394,13 @@ static int virtnet_probe(struct virtio_device *vdev)
> > if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
> > vi->mergeable_rx_bufs = true;
> >
> > -   if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
> > +   if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
> > +   

Re: [PATCH v3 2/4] drivers/net/virtio_net: Added basic RSS support.

2022-02-13 Thread Andrew Melnichenko
Hi all,

On Tue, Feb 8, 2022 at 10:37 PM Willem de Bruijn
 wrote:
>
> On Tue, Feb 8, 2022 at 1:19 PM Andrew Melnychenko  wrote:
> >
> > Added features for RSS.
> > Added initialization, RXHASH feature and ethtool ops.
> > By default RSS/RXHASH is disabled.
> > Virtio RSS "IPv6 extensions" hashes disabled.
> > Added ethtools ops to set key and indirection table.
> >
> > Signed-off-by: Andrew Melnychenko 
> > ---
> >  drivers/net/virtio_net.c | 191 +--
> >  1 file changed, 185 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 1404e683a2fd..495aed524e33 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -169,6 +169,24 @@ struct receive_queue {
> > struct xdp_rxq_info xdp_rxq;
> >  };
> >
> > +/* This structure can contain rss message with maximum settings for 
> > indirection table and keysize
> > + * Note, that default structure that describes RSS configuration 
> > virtio_net_rss_config
> > + * contains same info but can't handle table values.
> > + * In any case, structure would be passed to virtio hw through sg_buf 
> > split by parts
> > + * because table sizes may be differ according to the device configuration.
> > + */
> > +#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
>
> Future proof, may want to support larger sizes.
>
> netdevice.h defines NETDEV_RSS_KEY_LEN at 52.
>
> tools/testing/selftests/net/toeplitz.c supports up to 60

According to virtio specification, the length of the key is
40bytes(and an indirection table is 128 entries max).
So for now, we support a maximum of the spec regardless of what the
kernel is capable of.

>
> > +#define VIRTIO_NET_RSS_MAX_TABLE_LEN128
> > +struct virtio_net_ctrl_rss {
> > +   u32 hash_types;
>
> conversely, u32 is a bit extreme?

No, the structure virtio_net_ctrl_rss is specified by the specification.

>
> > +   u16 indirection_table_mask;
> > +   u16 unclassified_queue;
> > +   u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN];
> > +   u16 max_tx_vq;
> > +   u8 hash_key_length;
> > +   u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE];
> > +};
> > +
> >  /* Control VQ buffers: protected by the rtnl lock */
> >  struct control_buf {
> > struct virtio_net_ctrl_hdr hdr;
> > @@ -178,6 +196,7 @@ struct control_buf {
> > u8 allmulti;
> > __virtio16 vid;
> > __virtio64 offloads;
> > +   struct virtio_net_ctrl_rss rss;
> >  };
> >
> >  struct virtnet_info {
> > @@ -206,6 +225,12 @@ struct virtnet_info {
> > /* Host will merge rx buffers for big packets (shake it! shake it!) 
> > */
> > bool mergeable_rx_bufs;
> >
> > +   /* Host supports rss and/or hash report */
> > +   bool has_rss;
> > +   u8 rss_key_size;
> > +   u16 rss_indir_table_size;
> > +   u32 rss_hash_types_supported;
> > +
> > /* Has control virtqueue */
> > bool has_cvq;
> >
> > @@ -2184,6 +2209,56 @@ static void virtnet_get_ringparam(struct net_device 
> > *dev,
> > ring->tx_pending = ring->tx_max_pending;
> >  }
> >
> > +static bool virtnet_commit_rss_command(struct virtnet_info *vi)
> > +{
> > +   struct net_device *dev = vi->dev;
> > +   struct scatterlist sgs[4];
> > +   unsigned int sg_buf_size;
> > +
> > +   /* prepare sgs */
> > +   sg_init_table(sgs, 4);
> > +
> > +   sg_buf_size = offsetof(struct virtio_net_ctrl_rss, 
> > indirection_table);
> > +   sg_set_buf([0], >ctrl->rss, sg_buf_size);
> > +
> > +   sg_buf_size = sizeof(uint16_t) * vi->rss_indir_table_size;
> > +   sg_set_buf([1], vi->ctrl->rss.indirection_table, sg_buf_size);
> > +
> > +   sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key)
> > +   - offsetof(struct virtio_net_ctrl_rss, max_tx_vq);
> > +   sg_set_buf([2], >ctrl->rss.max_tx_vq, sg_buf_size);
> > +
> > +   sg_buf_size = vi->rss_key_size;
> > +   sg_set_buf([3], vi->ctrl->rss.key, sg_buf_size);
> > +
> > +   if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
> > + VIRTIO_NET_CTRL_MQ_RSS_CONFIG, sgs)) {
> > +   dev_warn(>dev, "VIRTIONET issue with committing RSS 
> > sgs\n");
> > +   return false;
> > +   }
> > +   return true;
> > +}
> > +
> > +static void virtnet_init_default_rss(struct virtnet_info *vi)
> > +{
> > +   u32 indir_val = 0;
> > +   int i = 0;
> > +
> > +   vi->ctrl->rss.hash_types = vi->rss_hash_types_supported;
> > +   vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size - 1;
>
> Is table size always a power of two?

Yes, it should be.

>
> > +   vi->ctrl->rss.unclassified_queue = 0;
> > +
> > +   for (; i < vi->rss_indir_table_size; ++i) {
> > +   indir_val = ethtool_rxfh_indir_default(i, 
> > vi->curr_queue_pairs);
> > +   vi->ctrl->rss.indirection_table[i] = indir_val;
> > +   }
> > +
> > +