[PATCH net-next 5/5] virtio_net: Implement DMA pre-handler

2023-05-25 Thread Liang Chen
Adding a DMA pre-handler that utilizes page pool for managing DMA mappings.
When IOMMU is enabled, turning on the page_pool_dma_map module parameter to
select page pool for DMA mapping management gives a significant reduction
in the overhead caused by DMA mappings.

In testing environments with a single core vm and qemu emulated IOMMU,
significant performance improvements can be observed:
  Upstream codebase: 1.76 Gbits/sec
  Upstream codebase with page pool fragmentation support: 1.81 Gbits/sec
  Upstream codebase with page pool fragmentation and DMA support: 19.3
  Gbits/sec

Signed-off-by: Liang Chen 
---
 drivers/net/virtio_net.c | 55 
 1 file changed, 55 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ac40b8c66c59..73cc4f9fe4fa 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static int napi_weight = NAPI_POLL_WEIGHT;
 module_param(napi_weight, int, 0444);
@@ -33,8 +34,10 @@ module_param(napi_tx, bool, 0644);
 
 static bool page_pool_enabled;
 static bool page_pool_frag;
+static bool page_pool_dma_map;
 module_param(page_pool_enabled, bool, 0400);
 module_param(page_pool_frag, bool, 0400);
+module_param(page_pool_dma_map, bool, 0400);
 
 /* FIXME: MTU in config. */
 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
@@ -3830,6 +3833,49 @@ static void virtnet_del_vqs(struct virtnet_info *vi)
virtnet_free_queues(vi);
 }
 
+static dma_addr_t virtnet_pp_dma_map_page(struct device *dev, struct page 
*page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir, unsigned 
long attrs)
+{
+   struct page *head_page;
+
+   if (dir != DMA_FROM_DEVICE)
+   return 0;
+
+   head_page = compound_head(page);
+   return page_pool_get_dma_addr(head_page)
+   + (page - head_page) * PAGE_SIZE
+   + offset;
+}
+
+static bool virtnet_pp_dma_unmap_page(struct device *dev, dma_addr_t 
dma_handle,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+   phys_addr_t phys;
+
+   /* Handle only the RX direction, and sync the DMA memory only if it's 
not
+* a DMA coherent architecture.
+*/
+   if (dir != DMA_FROM_DEVICE)
+   return false;
+
+   if (dev_is_dma_coherent(dev))
+   return true;
+
+   phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
+   if (WARN_ON(!phys))
+   return false;
+
+   arch_sync_dma_for_cpu(phys, size, dir);
+   return true;
+}
+
+static struct virtqueue_pre_dma_ops virtnet_pp_pre_dma_ops = {
+   .map_page = virtnet_pp_dma_map_page,
+   .unmap_page = virtnet_pp_dma_unmap_page,
+};
+
 static void virtnet_alloc_page_pool(struct receive_queue *rq)
 {
struct virtio_device *vdev = rq->vq->vdev;
@@ -3845,6 +3891,15 @@ static void virtnet_alloc_page_pool(struct receive_queue 
*rq)
if (page_pool_frag)
pp_params.flags |= PP_FLAG_PAGE_FRAG;
 
+   /* Consider using page pool DMA support only when DMA API is used. */
+   if (virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM) &&
+   page_pool_dma_map) {
+   pp_params.flags |= PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+   pp_params.dma_dir = DMA_FROM_DEVICE;
+   pp_params.max_len = PAGE_SIZE << pp_params.order;
+   virtqueue_register_pre_dma_ops(rq->vq, _pp_pre_dma_ops);
+   }
+
rq->page_pool = page_pool_create(_params);
if (IS_ERR(rq->page_pool)) {
dev_warn(>dev, "page pool creation failed: %ld\n",
-- 
2.31.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net-next 4/5] virtio_ring: Introduce DMA pre-handler

2023-05-25 Thread Liang Chen
Currently, DMA operations of virtio devices' data buffer are encapsulated
within the underlying virtqueue implementation. DMA map/unmap operations
are performed for each data buffer attached to/detached from the virtqueue,
which is transparent and invisible to the higher-level virtio device
drivers. This encapsulation makes it not viable for device drivers to
introduce certain mechanisms, such as page pool, that require explicit
management of DMA map/unmap. Therefore, by inserting a pre-handler before
the generic DMA map/unmap operations, virtio device drivers have the
opportunity to participate in DMA operations.

Signed-off-by: Liang Chen 
---
 drivers/virtio/virtio_ring.c | 73 +---
 include/linux/virtio.h   | 18 +
 2 files changed, 85 insertions(+), 6 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index c5310eaf8b46..a99641260555 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -213,6 +213,9 @@ struct vring_virtqueue {
bool last_add_time_valid;
ktime_t last_add_time;
 #endif
+
+   /* DMA mapping Pre-handler for virtio device driver */
+   struct virtqueue_pre_dma_ops *pre_dma_ops;
 };
 
 static struct virtqueue *__vring_new_virtqueue(unsigned int index,
@@ -369,6 +372,19 @@ static dma_addr_t vring_map_one_sg(const struct 
vring_virtqueue *vq,
return (dma_addr_t)sg_phys(sg);
}
 
+   /* Allow virtio drivers to perform customized mapping operation, and
+* fallback to the generic path if it fails to handle the mapping.
+*/
+   if (vq->pre_dma_ops && vq->pre_dma_ops->map_page) {
+   dma_addr_t addr;
+
+   addr = vq->pre_dma_ops->map_page(vring_dma_dev(vq),
+   sg_page(sg), sg->offset, sg->length,
+   direction, 0);
+   if (addr)
+   return addr;
+   }
+
/*
 * We can't use dma_map_sg, because we don't use scatterlists in
 * the way it expects (we don't guarantee that the scatterlist
@@ -432,6 +448,15 @@ static void vring_unmap_one_split_indirect(const struct 
vring_virtqueue *vq,
 
flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
 
+   if (vq->pre_dma_ops && vq->pre_dma_ops->unmap_page) {
+   if (vq->pre_dma_ops->unmap_page(vring_dma_dev(vq),
+   virtio64_to_cpu(vq->vq.vdev, 
desc->addr),
+   virtio32_to_cpu(vq->vq.vdev, desc->len),
+   (flags & VRING_DESC_F_WRITE) ?
+   DMA_FROM_DEVICE : DMA_TO_DEVICE, 0))
+   return;
+   }
+
dma_unmap_page(vring_dma_dev(vq),
   virtio64_to_cpu(vq->vq.vdev, desc->addr),
   virtio32_to_cpu(vq->vq.vdev, desc->len),
@@ -456,14 +481,22 @@ static unsigned int vring_unmap_one_split(const struct 
vring_virtqueue *vq,
 extra[i].len,
 (flags & VRING_DESC_F_WRITE) ?
 DMA_FROM_DEVICE : DMA_TO_DEVICE);
-   } else {
-   dma_unmap_page(vring_dma_dev(vq),
-  extra[i].addr,
-  extra[i].len,
-  (flags & VRING_DESC_F_WRITE) ?
-  DMA_FROM_DEVICE : DMA_TO_DEVICE);
+   goto out;
+   } else if (vq->pre_dma_ops && vq->pre_dma_ops->unmap_page) {
+   if (vq->pre_dma_ops->unmap_page(vring_dma_dev(vq),
+   extra[i].addr,
+   extra[i].len,
+   (flags & VRING_DESC_F_WRITE) ?
+   DMA_FROM_DEVICE : DMA_TO_DEVICE, 0))
+   goto out;
}
 
+   dma_unmap_page(vring_dma_dev(vq),
+   extra[i].addr,
+   extra[i].len,
+   (flags & VRING_DESC_F_WRITE) ?
+   DMA_FROM_DEVICE : DMA_TO_DEVICE);
+
 out:
return extra[i].next;
 }
@@ -1206,10 +1239,19 @@ static void vring_unmap_extra_packed(const struct 
vring_virtqueue *vq,
 (flags & VRING_DESC_F_WRITE) ?
 DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
+   if (vq->pre_dma_ops && vq->pre_dma_ops->unmap_page) {
+   if (vq->pre_dma_ops->unmap_page(vring_dma_dev(vq),
+   extra->addr,
+   extra->len,
+   (flags & VRING_DESC_F_WRITE) ?
+   DMA_FROM_DEVICE : 
DMA_TO_DEVICE, 0))
+   return;
+   }
  

[PATCH net-next 3/5] virtio_net: Add page pool fragmentation support

2023-05-25 Thread Liang Chen
To further enhance performance, implement page pool fragmentation
support and introduce a module parameter to enable or disable it.

In single-core vm testing environments, there is an additional performance
gain observed in the normal path compared to the one packet per page
approach.
  Upstream codebase: 47.5 Gbits/sec
  Upstream codebase with page pool: 50.2 Gbits/sec
  Upstream codebase with page pool fragmentation support: 52.3 Gbits/sec

There is also some performance gain for XDP cpumap.
  Upstream codebase: 1.38 Gbits/sec
  Upstream codebase with page pool: 9.74 Gbits/sec
  Upstream codebase with page pool fragmentation: 10.3 Gbits/sec

Signed-off-by: Liang Chen 
---
 drivers/net/virtio_net.c | 72 ++--
 1 file changed, 55 insertions(+), 17 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 99c0ca0c1781..ac40b8c66c59 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -32,7 +32,9 @@ module_param(gso, bool, 0444);
 module_param(napi_tx, bool, 0644);
 
 static bool page_pool_enabled;
+static bool page_pool_frag;
 module_param(page_pool_enabled, bool, 0400);
+module_param(page_pool_frag, bool, 0400);
 
 /* FIXME: MTU in config. */
 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
@@ -909,23 +911,32 @@ static struct page *xdp_linearize_page(struct 
receive_queue *rq,
   struct page *p,
   int offset,
   int page_off,
-  unsigned int *len)
+  unsigned int *len,
+  unsigned int *pp_frag_offset)
 {
int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
struct page *page;
+   unsigned int pp_frag_offset_val;
 
if (page_off + *len + tailroom > PAGE_SIZE)
return NULL;
 
if (rq->page_pool)
-   page = page_pool_dev_alloc_pages(rq->page_pool);
+   if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
+   page = page_pool_dev_alloc_frag(rq->page_pool, 
pp_frag_offset,
+   PAGE_SIZE);
+   else
+   page = page_pool_dev_alloc_pages(rq->page_pool);
else
page = alloc_page(GFP_ATOMIC);
 
if (!page)
return NULL;
 
-   memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
+   pp_frag_offset_val = pp_frag_offset ? *pp_frag_offset : 0;
+
+   memcpy(page_address(page) + page_off + pp_frag_offset_val,
+  page_address(p) + offset, *len);
page_off += *len;
 
while (--*num_buf) {
@@ -948,7 +959,7 @@ static struct page *xdp_linearize_page(struct receive_queue 
*rq,
goto err_buf;
}
 
-   memcpy(page_address(page) + page_off,
+   memcpy(page_address(page) + page_off + pp_frag_offset_val,
   page_address(p) + off, buflen);
page_off += buflen;
virtnet_put_page(rq, p);
@@ -1029,7 +1040,7 @@ static struct sk_buff *receive_small_xdp(struct 
net_device *dev,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
xdp_page = xdp_linearize_page(rq, _buf, page,
  offset, header_offset,
- );
+ , NULL);
if (!xdp_page)
goto err_xdp;
 
@@ -1323,6 +1334,7 @@ static void *mergeable_xdp_get_buf(struct virtnet_info 
*vi,
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
struct page *xdp_page;
unsigned int xdp_room;
+   unsigned int page_frag_offset = 0;
 
/* Transient failure which in theory could occur if
 * in-flight packets from before XDP was enabled reach
@@ -1356,7 +1368,8 @@ static void *mergeable_xdp_get_buf(struct virtnet_info 
*vi,
xdp_page = xdp_linearize_page(rq, num_buf,
  *page, offset,
  VIRTIO_XDP_HEADROOM,
- len);
+ len,
+ _frag_offset);
if (!xdp_page)
return NULL;
} else {
@@ -1366,14 +1379,19 @@ static void *mergeable_xdp_get_buf(struct virtnet_info 
*vi,
return NULL;
 
if (rq->page_pool)
-   xdp_page = page_pool_dev_alloc_pages(rq->page_pool);
+   if (rq->page_pool->p.flags & PP_FLAG_PAGE_FRAG)
+   xdp_page = 
page_pool_dev_alloc_frag(rq->page_pool,
+

[PATCH net-next 2/5] virtio_net: Add page_pool support to improve performance

2023-05-25 Thread Liang Chen
The implementation at the moment uses one page per packet in both the
normal and XDP path. In addition, introducing a module parameter to enable
or disable the usage of page pool (disabled by default).

In single-core vm testing environments, it gives a modest performance gain
in the normal path.
  Upstream codebase: 47.5 Gbits/sec
  Upstream codebase + page_pool support: 50.2 Gbits/sec

In multi-core vm testing environments, The most significant performance
gain is observed in XDP cpumap:
  Upstream codebase: 1.38 Gbits/sec
  Upstream codebase + page_pool support: 9.74 Gbits/sec

With this foundation, we can further integrate page pool fragmentation and
DMA map/unmap support.

Signed-off-by: Liang Chen 
---
 drivers/net/virtio_net.c | 188 ++-
 1 file changed, 146 insertions(+), 42 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c5dca0d92e64..99c0ca0c1781 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -31,6 +31,9 @@ module_param(csum, bool, 0444);
 module_param(gso, bool, 0444);
 module_param(napi_tx, bool, 0644);
 
+static bool page_pool_enabled;
+module_param(page_pool_enabled, bool, 0400);
+
 /* FIXME: MTU in config. */
 #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
 #define GOOD_COPY_LEN  128
@@ -159,6 +162,9 @@ struct receive_queue {
/* Chain pages by the private ptr. */
struct page *pages;
 
+   /* Page pool */
+   struct page_pool *page_pool;
+
/* Average packet length for mergeable receive buffers. */
struct ewma_pkt_len mrg_avg_pkt_len;
 
@@ -459,6 +465,14 @@ static struct sk_buff *virtnet_build_skb(void *buf, 
unsigned int buflen,
return skb;
 }
 
+static void virtnet_put_page(struct receive_queue *rq, struct page *page)
+{
+   if (rq->page_pool)
+   page_pool_put_full_page(rq->page_pool, page, true);
+   else
+   put_page(page);
+}
+
 /* Called from bottom half context */
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
   struct receive_queue *rq,
@@ -555,7 +569,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
hdr = skb_vnet_hdr(skb);
memcpy(hdr, hdr_p, hdr_len);
if (page_to_free)
-   put_page(page_to_free);
+   virtnet_put_page(rq, page_to_free);
 
return skb;
 }
@@ -802,7 +816,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
return ret;
 }
 
-static void put_xdp_frags(struct xdp_buff *xdp)
+static void put_xdp_frags(struct xdp_buff *xdp, struct receive_queue *rq)
 {
struct skb_shared_info *shinfo;
struct page *xdp_page;
@@ -812,7 +826,7 @@ static void put_xdp_frags(struct xdp_buff *xdp)
shinfo = xdp_get_shared_info_from_buff(xdp);
for (i = 0; i < shinfo->nr_frags; i++) {
xdp_page = skb_frag_page(>frags[i]);
-   put_page(xdp_page);
+   virtnet_put_page(rq, xdp_page);
}
}
 }
@@ -903,7 +917,11 @@ static struct page *xdp_linearize_page(struct 
receive_queue *rq,
if (page_off + *len + tailroom > PAGE_SIZE)
return NULL;
 
-   page = alloc_page(GFP_ATOMIC);
+   if (rq->page_pool)
+   page = page_pool_dev_alloc_pages(rq->page_pool);
+   else
+   page = alloc_page(GFP_ATOMIC);
+
if (!page)
return NULL;
 
@@ -926,21 +944,24 @@ static struct page *xdp_linearize_page(struct 
receive_queue *rq,
 * is sending packet larger than the MTU.
 */
if ((page_off + buflen + tailroom) > PAGE_SIZE) {
-   put_page(p);
+   virtnet_put_page(rq, p);
goto err_buf;
}
 
memcpy(page_address(page) + page_off,
   page_address(p) + off, buflen);
page_off += buflen;
-   put_page(p);
+   virtnet_put_page(rq, p);
}
 
/* Headroom does not contribute to packet length */
*len = page_off - VIRTIO_XDP_HEADROOM;
return page;
 err_buf:
-   __free_pages(page, 0);
+   if (rq->page_pool)
+   page_pool_put_full_page(rq->page_pool, page, true);
+   else
+   __free_pages(page, 0);
return NULL;
 }
 
@@ -1144,7 +1165,7 @@ static void mergeable_buf_free(struct receive_queue *rq, 
int num_buf,
}
stats->bytes += len;
page = virt_to_head_page(buf);
-   put_page(page);
+   virtnet_put_page(rq, page);
}
 }
 
@@ -1264,7 +1285,7 @@ static int virtnet_build_xdp_buff_mrg(struct net_device 
*dev,
cur_frag_size = truesize;
xdp_frags_truesz += cur_frag_size;
if (unlikely(len > truesize - room || cur_frag_size > 

[PATCH net-next 1/5] virtio_net: Fix an unsafe reference to the page chain

2023-05-25 Thread Liang Chen
"private" of buffer page is currently used for big mode to chain pages.
But in mergeable mode, that offset of page could mean something else,
e.g. when page_pool page is used instead. So excluding mergeable mode to
avoid such a problem.

Signed-off-by: Liang Chen 
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 5a7f7a76b920..c5dca0d92e64 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -497,7 +497,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
return NULL;
 
page = (struct page *)page->private;
-   if (page)
+   if (!vi->mergeable_rx_bufs && page)
give_pages(rq, page);
goto ok;
}
-- 
2.31.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 4/5] vDPA/ifcvf: synchronize irqs in the reset routine

2023-05-25 Thread Zhu, Lingshan



On 5/26/2023 11:36 AM, Zhu, Lingshan wrote:



On 5/26/2023 9:34 AM, Jason Wang wrote:
On Thu, May 25, 2023 at 5:38 PM Zhu, Lingshan 
 wrote:



On 5/24/2023 4:03 PM, Jason Wang wrote:
On Mon, May 8, 2023 at 6:05 PM Zhu Lingshan 
 wrote:

This commit synchronize irqs of the virtqueues
and config space in the reset routine.
Thus ifcvf_stop_hw() and reset() are refactored as well.

Signed-off-by: Zhu Lingshan 
---
   drivers/vdpa/ifcvf/ifcvf_base.c | 41 +
   drivers/vdpa/ifcvf/ifcvf_base.h |  1 +
   drivers/vdpa/ifcvf/ifcvf_main.c | 46 
+

   3 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c 
b/drivers/vdpa/ifcvf/ifcvf_base.c

index 79e313c5e10e..1f39290baa38 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.c
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -170,12 +170,9 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 
status)


   void ifcvf_reset(struct ifcvf_hw *hw)
   {
-   hw->config_cb.callback = NULL;
-   hw->config_cb.private = NULL;
-
  ifcvf_set_status(hw, 0);
-   /* flush set_status, make sure VF is stopped, reset */
-   ifcvf_get_status(hw);
+   while (ifcvf_get_status(hw))
+   msleep(1);
   }

   u64 ifcvf_get_hw_features(struct ifcvf_hw *hw)
@@ -368,20 +365,42 @@ void ifcvf_set_vq_ready(struct ifcvf_hw *hw, 
u16 qid, bool ready)

  vp_iowrite16(ready, >queue_enable);
   }

-static void ifcvf_hw_disable(struct ifcvf_hw *hw)
+static void ifcvf_reset_vring(struct ifcvf_hw *hw)
   {
-   u32 i;
+   u16 qid;
+
+   for (qid = 0; qid < hw->nr_vring; qid++) {
+   hw->vring[qid].cb.callback = NULL;
+   hw->vring[qid].cb.private = NULL;
+   ifcvf_set_vq_vector(hw, qid, VIRTIO_MSI_NO_VECTOR);
+   }
+}

+static void ifcvf_reset_config_handler(struct ifcvf_hw *hw)
+{
+   hw->config_cb.callback = NULL;
+   hw->config_cb.private = NULL;
  ifcvf_set_config_vector(hw, VIRTIO_MSI_NO_VECTOR);
-   for (i = 0; i < hw->nr_vring; i++) {
-   ifcvf_set_vq_vector(hw, i, VIRTIO_MSI_NO_VECTOR);
+}
+
+static void ifcvf_synchronize_irq(struct ifcvf_hw *hw)
+{
+   u32 nvectors = hw->num_msix_vectors;
+   struct pci_dev *pdev = hw->pdev;
+   int i, irq;
+
+   for (i = 0; i < nvectors; i++) {
+   irq = pci_irq_vector(pdev, i);
+   if (irq >= 0)
+   synchronize_irq(irq);
  }
   }

   void ifcvf_stop_hw(struct ifcvf_hw *hw)
   {
-   ifcvf_hw_disable(hw);
-   ifcvf_reset(hw);
+   ifcvf_synchronize_irq(hw);
+   ifcvf_reset_vring(hw);
+   ifcvf_reset_config_handler(hw);

Nit:

So the name of this function is kind of misleading since irq
synchronization and virtqueue/config handler are not belong to
hardware?

Maybe it would be better to call it ifcvf_stop().

Sure, I will send a V3 with this renaming,
do you ack patch 1/5?

Yes, I think I've acked to that patch.

I will send a V3 with this renaming and your ack for patch 1/5

By the way, do you ack this one after this function renaming?
If so, I will also include your ack in V3, so we don't need another
review process, I will ping Michael for a merge.


Thanks


Thanks
Zhu Lingshan

Thanks


   }

   void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h 
b/drivers/vdpa/ifcvf/ifcvf_base.h

index d34d3bc0dbf4..7430f80779be 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -82,6 +82,7 @@ struct ifcvf_hw {
  int vqs_reused_irq;
  u16 nr_vring;
  /* VIRTIO_PCI_CAP_DEVICE_CFG size */
+   u32 num_msix_vectors;
  u32 cap_dev_config_size;
  struct pci_dev *pdev;
   };
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c 
b/drivers/vdpa/ifcvf/ifcvf_main.c

index 968687159e44..3401b9901dd2 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -125,6 +125,7 @@ static void ifcvf_free_irq(struct ifcvf_hw *vf)
  ifcvf_free_vq_irq(vf);
  ifcvf_free_config_irq(vf);
  ifcvf_free_irq_vectors(pdev);
+   vf->num_msix_vectors = 0;
   }

   /* ifcvf MSIX vectors allocator, this helper tries to allocate
@@ -343,36 +344,11 @@ static int ifcvf_request_irq(struct ifcvf_hw 
*vf)

  if (ret)
  return ret;

-   return 0;
-}
-
-static int ifcvf_stop_datapath(struct ifcvf_adapter *adapter)
-{
-   struct ifcvf_hw *vf = adapter->vf;
-   int i;
-
-   for (i = 0; i < vf->nr_vring; i++)
-   vf->vring[i].cb.callback = NULL;
-
-   ifcvf_stop_hw(vf);
+   vf->num_msix_vectors = nvectors;

  return 0;
   }

-static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
-{
-   struct ifcvf_hw *vf = adapter->vf;
-   int i;
-
-   for (i = 0; i < vf->nr_vring; i++) {
-   vf->vring[i].last_avail_idx = 0;
-   vf->vring[i].cb.callback = NULL;

Re: [PATCH V2 4/5] vDPA/ifcvf: synchronize irqs in the reset routine

2023-05-25 Thread Zhu, Lingshan



On 5/26/2023 9:34 AM, Jason Wang wrote:

On Thu, May 25, 2023 at 5:38 PM Zhu, Lingshan  wrote:



On 5/24/2023 4:03 PM, Jason Wang wrote:

On Mon, May 8, 2023 at 6:05 PM Zhu Lingshan  wrote:

This commit synchronize irqs of the virtqueues
and config space in the reset routine.
Thus ifcvf_stop_hw() and reset() are refactored as well.

Signed-off-by: Zhu Lingshan 
---
   drivers/vdpa/ifcvf/ifcvf_base.c | 41 +
   drivers/vdpa/ifcvf/ifcvf_base.h |  1 +
   drivers/vdpa/ifcvf/ifcvf_main.c | 46 +
   3 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
index 79e313c5e10e..1f39290baa38 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.c
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -170,12 +170,9 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status)

   void ifcvf_reset(struct ifcvf_hw *hw)
   {
-   hw->config_cb.callback = NULL;
-   hw->config_cb.private = NULL;
-
  ifcvf_set_status(hw, 0);
-   /* flush set_status, make sure VF is stopped, reset */
-   ifcvf_get_status(hw);
+   while (ifcvf_get_status(hw))
+   msleep(1);
   }

   u64 ifcvf_get_hw_features(struct ifcvf_hw *hw)
@@ -368,20 +365,42 @@ void ifcvf_set_vq_ready(struct ifcvf_hw *hw, u16 qid, 
bool ready)
  vp_iowrite16(ready, >queue_enable);
   }

-static void ifcvf_hw_disable(struct ifcvf_hw *hw)
+static void ifcvf_reset_vring(struct ifcvf_hw *hw)
   {
-   u32 i;
+   u16 qid;
+
+   for (qid = 0; qid < hw->nr_vring; qid++) {
+   hw->vring[qid].cb.callback = NULL;
+   hw->vring[qid].cb.private = NULL;
+   ifcvf_set_vq_vector(hw, qid, VIRTIO_MSI_NO_VECTOR);
+   }
+}

+static void ifcvf_reset_config_handler(struct ifcvf_hw *hw)
+{
+   hw->config_cb.callback = NULL;
+   hw->config_cb.private = NULL;
  ifcvf_set_config_vector(hw, VIRTIO_MSI_NO_VECTOR);
-   for (i = 0; i < hw->nr_vring; i++) {
-   ifcvf_set_vq_vector(hw, i, VIRTIO_MSI_NO_VECTOR);
+}
+
+static void ifcvf_synchronize_irq(struct ifcvf_hw *hw)
+{
+   u32 nvectors = hw->num_msix_vectors;
+   struct pci_dev *pdev = hw->pdev;
+   int i, irq;
+
+   for (i = 0; i < nvectors; i++) {
+   irq = pci_irq_vector(pdev, i);
+   if (irq >= 0)
+   synchronize_irq(irq);
  }
   }

   void ifcvf_stop_hw(struct ifcvf_hw *hw)
   {
-   ifcvf_hw_disable(hw);
-   ifcvf_reset(hw);
+   ifcvf_synchronize_irq(hw);
+   ifcvf_reset_vring(hw);
+   ifcvf_reset_config_handler(hw);

Nit:

So the name of this function is kind of misleading since irq
synchronization and virtqueue/config handler are not belong to
hardware?

Maybe it would be better to call it ifcvf_stop().

Sure, I will send a V3 with this renaming,
do you ack patch 1/5?

Yes, I think I've acked to that patch.

I will send a V3 with this renaming and your ack for patch 1/5


Thanks


Thanks
Zhu Lingshan

Thanks


   }

   void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index d34d3bc0dbf4..7430f80779be 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -82,6 +82,7 @@ struct ifcvf_hw {
  int vqs_reused_irq;
  u16 nr_vring;
  /* VIRTIO_PCI_CAP_DEVICE_CFG size */
+   u32 num_msix_vectors;
  u32 cap_dev_config_size;
  struct pci_dev *pdev;
   };
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 968687159e44..3401b9901dd2 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -125,6 +125,7 @@ static void ifcvf_free_irq(struct ifcvf_hw *vf)
  ifcvf_free_vq_irq(vf);
  ifcvf_free_config_irq(vf);
  ifcvf_free_irq_vectors(pdev);
+   vf->num_msix_vectors = 0;
   }

   /* ifcvf MSIX vectors allocator, this helper tries to allocate
@@ -343,36 +344,11 @@ static int ifcvf_request_irq(struct ifcvf_hw *vf)
  if (ret)
  return ret;

-   return 0;
-}
-
-static int ifcvf_stop_datapath(struct ifcvf_adapter *adapter)
-{
-   struct ifcvf_hw *vf = adapter->vf;
-   int i;
-
-   for (i = 0; i < vf->nr_vring; i++)
-   vf->vring[i].cb.callback = NULL;
-
-   ifcvf_stop_hw(vf);
+   vf->num_msix_vectors = nvectors;

  return 0;
   }

-static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
-{
-   struct ifcvf_hw *vf = adapter->vf;
-   int i;
-
-   for (i = 0; i < vf->nr_vring; i++) {
-   vf->vring[i].last_avail_idx = 0;
-   vf->vring[i].cb.callback = NULL;
-   vf->vring[i].cb.private = NULL;
-   }
-
-   ifcvf_reset(vf);
-}
-
   static struct ifcvf_adapter *vdpa_to_adapter(struct vdpa_device *vdpa_dev)
   {
  return container_of(vdpa_dev, struct ifcvf_adapter, 

Re: [PATCH V2 4/5] vDPA/ifcvf: synchronize irqs in the reset routine

2023-05-25 Thread Jason Wang
On Thu, May 25, 2023 at 5:38 PM Zhu, Lingshan  wrote:
>
>
>
> On 5/24/2023 4:03 PM, Jason Wang wrote:
> > On Mon, May 8, 2023 at 6:05 PM Zhu Lingshan  wrote:
> >> This commit synchronize irqs of the virtqueues
> >> and config space in the reset routine.
> >> Thus ifcvf_stop_hw() and reset() are refactored as well.
> >>
> >> Signed-off-by: Zhu Lingshan 
> >> ---
> >>   drivers/vdpa/ifcvf/ifcvf_base.c | 41 +
> >>   drivers/vdpa/ifcvf/ifcvf_base.h |  1 +
> >>   drivers/vdpa/ifcvf/ifcvf_main.c | 46 +
> >>   3 files changed, 38 insertions(+), 50 deletions(-)
> >>
> >> diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c 
> >> b/drivers/vdpa/ifcvf/ifcvf_base.c
> >> index 79e313c5e10e..1f39290baa38 100644
> >> --- a/drivers/vdpa/ifcvf/ifcvf_base.c
> >> +++ b/drivers/vdpa/ifcvf/ifcvf_base.c
> >> @@ -170,12 +170,9 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status)
> >>
> >>   void ifcvf_reset(struct ifcvf_hw *hw)
> >>   {
> >> -   hw->config_cb.callback = NULL;
> >> -   hw->config_cb.private = NULL;
> >> -
> >>  ifcvf_set_status(hw, 0);
> >> -   /* flush set_status, make sure VF is stopped, reset */
> >> -   ifcvf_get_status(hw);
> >> +   while (ifcvf_get_status(hw))
> >> +   msleep(1);
> >>   }
> >>
> >>   u64 ifcvf_get_hw_features(struct ifcvf_hw *hw)
> >> @@ -368,20 +365,42 @@ void ifcvf_set_vq_ready(struct ifcvf_hw *hw, u16 
> >> qid, bool ready)
> >>  vp_iowrite16(ready, >queue_enable);
> >>   }
> >>
> >> -static void ifcvf_hw_disable(struct ifcvf_hw *hw)
> >> +static void ifcvf_reset_vring(struct ifcvf_hw *hw)
> >>   {
> >> -   u32 i;
> >> +   u16 qid;
> >> +
> >> +   for (qid = 0; qid < hw->nr_vring; qid++) {
> >> +   hw->vring[qid].cb.callback = NULL;
> >> +   hw->vring[qid].cb.private = NULL;
> >> +   ifcvf_set_vq_vector(hw, qid, VIRTIO_MSI_NO_VECTOR);
> >> +   }
> >> +}
> >>
> >> +static void ifcvf_reset_config_handler(struct ifcvf_hw *hw)
> >> +{
> >> +   hw->config_cb.callback = NULL;
> >> +   hw->config_cb.private = NULL;
> >>  ifcvf_set_config_vector(hw, VIRTIO_MSI_NO_VECTOR);
> >> -   for (i = 0; i < hw->nr_vring; i++) {
> >> -   ifcvf_set_vq_vector(hw, i, VIRTIO_MSI_NO_VECTOR);
> >> +}
> >> +
> >> +static void ifcvf_synchronize_irq(struct ifcvf_hw *hw)
> >> +{
> >> +   u32 nvectors = hw->num_msix_vectors;
> >> +   struct pci_dev *pdev = hw->pdev;
> >> +   int i, irq;
> >> +
> >> +   for (i = 0; i < nvectors; i++) {
> >> +   irq = pci_irq_vector(pdev, i);
> >> +   if (irq >= 0)
> >> +   synchronize_irq(irq);
> >>  }
> >>   }
> >>
> >>   void ifcvf_stop_hw(struct ifcvf_hw *hw)
> >>   {
> >> -   ifcvf_hw_disable(hw);
> >> -   ifcvf_reset(hw);
> >> +   ifcvf_synchronize_irq(hw);
> >> +   ifcvf_reset_vring(hw);
> >> +   ifcvf_reset_config_handler(hw);
> > Nit:
> >
> > So the name of this function is kind of misleading since irq
> > synchronization and virtqueue/config handler are not belong to
> > hardware?
> >
> > Maybe it would be better to call it ifcvf_stop().
> Sure, I will send a V3 with this renaming,
> do you ack patch 1/5?

Yes, I think I've acked to that patch.

Thanks

>
> Thanks
> Zhu Lingshan
> >
> > Thanks
> >
> >>   }
> >>
> >>   void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
> >> diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h 
> >> b/drivers/vdpa/ifcvf/ifcvf_base.h
> >> index d34d3bc0dbf4..7430f80779be 100644
> >> --- a/drivers/vdpa/ifcvf/ifcvf_base.h
> >> +++ b/drivers/vdpa/ifcvf/ifcvf_base.h
> >> @@ -82,6 +82,7 @@ struct ifcvf_hw {
> >>  int vqs_reused_irq;
> >>  u16 nr_vring;
> >>  /* VIRTIO_PCI_CAP_DEVICE_CFG size */
> >> +   u32 num_msix_vectors;
> >>  u32 cap_dev_config_size;
> >>  struct pci_dev *pdev;
> >>   };
> >> diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c 
> >> b/drivers/vdpa/ifcvf/ifcvf_main.c
> >> index 968687159e44..3401b9901dd2 100644
> >> --- a/drivers/vdpa/ifcvf/ifcvf_main.c
> >> +++ b/drivers/vdpa/ifcvf/ifcvf_main.c
> >> @@ -125,6 +125,7 @@ static void ifcvf_free_irq(struct ifcvf_hw *vf)
> >>  ifcvf_free_vq_irq(vf);
> >>  ifcvf_free_config_irq(vf);
> >>  ifcvf_free_irq_vectors(pdev);
> >> +   vf->num_msix_vectors = 0;
> >>   }
> >>
> >>   /* ifcvf MSIX vectors allocator, this helper tries to allocate
> >> @@ -343,36 +344,11 @@ static int ifcvf_request_irq(struct ifcvf_hw *vf)
> >>  if (ret)
> >>  return ret;
> >>
> >> -   return 0;
> >> -}
> >> -
> >> -static int ifcvf_stop_datapath(struct ifcvf_adapter *adapter)
> >> -{
> >> -   struct ifcvf_hw *vf = adapter->vf;
> >> -   int i;
> >> -
> >> -   for (i = 0; i < vf->nr_vring; i++)
> >> -   vf->vring[i].cb.callback = NULL;
> >> -
> >> -   ifcvf_stop_hw(vf);
> >> +   vf->num_msix_vectors = nvectors;
> >>
> >>  

Re: [PATCH V3 net-next 1/2] virtio-net: convert rx mode setting to use workqueue

2023-05-25 Thread Jason Wang
On Thu, May 25, 2023 at 3:41 PM Michael S. Tsirkin  wrote:
>
> On Thu, May 25, 2023 at 11:43:34AM +0800, Jason Wang wrote:
> > On Wed, May 24, 2023 at 5:15 PM Michael S. Tsirkin  wrote:
> > >
> > > On Wed, May 24, 2023 at 04:18:41PM +0800, Jason Wang wrote:
> > > > This patch convert rx mode setting to be done in a workqueue, this is
> > > > a must for allow to sleep when waiting for the cvq command to
> > > > response since current code is executed under addr spin lock.
> > > >
> > > > Signed-off-by: Jason Wang 
> > > > ---
> > > > Changes since V1:
> > > > - use RTNL to synchronize rx mode worker
> > > > ---
> > > >  drivers/net/virtio_net.c | 55 +---
> > > >  1 file changed, 52 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index 56ca1d270304..5d2f1da4eaa0 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -265,6 +265,12 @@ struct virtnet_info {
> > > >   /* Work struct for config space updates */
> > > >   struct work_struct config_work;
> > > >
> > > > + /* Work struct for config rx mode */
> > >
> > > With a bit less abbreviation maybe? setting rx mode?
> >
> > That's fine.
> >
> > >
> > > > + struct work_struct rx_mode_work;
> > > > +
> > > > + /* Is rx mode work enabled? */
> > >
> > > Ugh not a great comment.
> >
> > Any suggestions for this. E.g we had:
> >
> > /* Is delayed refill enabled? */
>
> /* OK to queue work setting RX mode? */

Ok.

>
>
> > >
> > > > + bool rx_mode_work_enabled;
> > > > +
> > >
> > >
> > >
> > > >   /* Does the affinity hint is set for virtqueues? */
> > > >   bool affinity_hint_set;
> > > >
> > > > @@ -388,6 +394,20 @@ static void disable_delayed_refill(struct 
> > > > virtnet_info *vi)
> > > >   spin_unlock_bh(>refill_lock);
> > > >  }
> > > >
> > > > +static void enable_rx_mode_work(struct virtnet_info *vi)
> > > > +{
> > > > + rtnl_lock();
> > > > + vi->rx_mode_work_enabled = true;
> > > > + rtnl_unlock();
> > > > +}
> > > > +
> > > > +static void disable_rx_mode_work(struct virtnet_info *vi)
> > > > +{
> > > > + rtnl_lock();
> > > > + vi->rx_mode_work_enabled = false;
> > > > + rtnl_unlock();
> > > > +}
> > > > +
> > > >  static void virtqueue_napi_schedule(struct napi_struct *napi,
> > > >   struct virtqueue *vq)
> > > >  {
> > > > @@ -2341,9 +2361,11 @@ static int virtnet_close(struct net_device *dev)
> > > >   return 0;
> > > >  }
> > > >
> > > > -static void virtnet_set_rx_mode(struct net_device *dev)
> > > > +static void virtnet_rx_mode_work(struct work_struct *work)
> > > >  {
> > > > - struct virtnet_info *vi = netdev_priv(dev);
> > > > + struct virtnet_info *vi =
> > > > + container_of(work, struct virtnet_info, rx_mode_work);
> > > > + struct net_device *dev = vi->dev;
> > > >   struct scatterlist sg[2];
> > > >   struct virtio_net_ctrl_mac *mac_data;
> > > >   struct netdev_hw_addr *ha;
> > > > @@ -2356,6 +2378,8 @@ static void virtnet_set_rx_mode(struct net_device 
> > > > *dev)
> > > >   if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
> > > >   return;
> > > >
> > > > + rtnl_lock();
> > > > +
> > > >   vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
> > > >   vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
> > > >
> > > > @@ -2373,14 +2397,19 @@ static void virtnet_set_rx_mode(struct 
> > > > net_device *dev)
> > > >   dev_warn(>dev, "Failed to %sable allmulti mode.\n",
> > > >vi->ctrl->allmulti ? "en" : "dis");
> > > >
> > > > + netif_addr_lock_bh(dev);
> > > > +
> > > >   uc_count = netdev_uc_count(dev);
> > > >   mc_count = netdev_mc_count(dev);
> > > >   /* MAC filter - use one buffer for both lists */
> > > >   buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
> > > > (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
> > > >   mac_data = buf;
> > > > - if (!buf)
> > > > + if (!buf) {
> > > > + netif_addr_unlock_bh(dev);
> > > > + rtnl_unlock();
> > > >   return;
> > > > + }
> > > >
> > > >   sg_init_table(sg, 2);
> > > >
> > > > @@ -2401,6 +2430,8 @@ static void virtnet_set_rx_mode(struct net_device 
> > > > *dev)
> > > >   netdev_for_each_mc_addr(ha, dev)
> > > >   memcpy(_data->macs[i++][0], ha->addr, ETH_ALEN);
> > > >
> > > > + netif_addr_unlock_bh(dev);
> > > > +
> > > >   sg_set_buf([1], mac_data,
> > > >  sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
> > > >
> > > > @@ -2408,9 +2439,19 @@ static void virtnet_set_rx_mode(struct 
> > > > net_device *dev)
> > > > VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
> > > >   dev_warn(>dev, "Failed to set MAC filter table.\n");
> > > >
> > > > + 

Re: [PATCH 3/3] fork, vhost: Use CLONE_THREAD to fix freezer/ps regression

2023-05-25 Thread Linus Torvalds
On Thu, May 25, 2023 at 8:30 AM Eric W. Biederman  wrote:
>
> Basically with no patches that is where Linus's kernel is.
>
> User complained about the new thread showing up in ps.

Well, not only that, but it actively broke existing workflows for
managing things. Showing up in 'ps' wasn't just some purely cosmetic
issue, but had semantic meaning.

And honestly, I think the core issue is that we should just make this
work. Kernel threads randomly switching to user memory threads was
wrong, so CLONE_VM is absolutely the right thing to do.

But while "CLONE_VM without real threading" is a very traditional
thing in Linux - it was the original model for clone(), after all - I
don't believe it is the *correct* model. There was a very real reason
clone() has grown CLONE_THREAD and friends.

So honestly, I really think we want to complete the vhost move to
CLONE_THREAD (and thus CLONE_SIGNAL).

Not because the old kthread model didn't _work_, but because it's
really really wrong to try to randomly take on user-space attributes
at run-time.

And once you do the "user threads in kernel space" model, at that
point you really do want to act like a proper thread. Both because of
that 'ps' issue (which is really just "show the world what your
relationship is), but simply because that is the modern threading
model that we use for everything else, and special cases are bad.

So I'd really like to finish this. Even if we end up with a hack or
two in signal handling that we can hopefully fix up later by having
vhost fix up some of its current assumptions.

It has worked wonderfully well for io_uring - but we *did* have quite
a bit of conversion patches over some time as people found issues.
Which is why I don't expect the vhost conevrsion to be entirely
pain-free either, and I don't think we necessarily have to get to a
"perfect and clean" state immediately, just a "working and
conceptually in the right direction" state.

 Linus
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH 3/3] fork, vhost: Use CLONE_THREAD to fix freezer/ps regression

2023-05-25 Thread Mike Christie
On 5/23/23 7:15 AM, Oleg Nesterov wrote:
> 
> Now the main question. Whatever we do, SIGKILL/SIGSTOP/etc can come right
> before we call work->fn(). Is it "safe" to run this callback with
> signal_pending() or fatal_signal_pending() ?

The questions before this one I'll leave for the core vhost devs since
they know best.

For this question and the one below, when we get SIGKILL we think it's ok
to fail the operation as in it's ok to not execute it like normal (send
it down to the net/target/scsi/block layers for execution). However, we
need to do some processing of the work to release mem, refcounts, etc.

For example we use the vhost_task to handle completions from the layers
we interact with. So when we get a SIGKILL, we could have N commands in
the target/block/scsi/nvme layers below the vhost layer. When those
complete, the current code assumes we have the vhost_task to handle the
responses. So for pending works on that work_list, we can pretty easily
kill them. However, we don't have a way to kill those outstanding
requests to some other layer, so we have to wait and handle them
somewhere.

If you are saying that once we get SIGKILL then we just can't use the
task anymore and we have to drop down to workqueue/kthread or change up
the completion paths so they can execute in the context they are completed
from by lower levels, then I can code this. On the vhost side, it's just
really ugly vs the code we have now that used to use kthreads (or in
6.4-rc looks like a process) so we couldn't get signals and just had the
one code path that removes us.

>From the vhost point of view signals are not useful to us and it's just
adding complexity for something that I don't think is useful to users.
However after discussing this with guys for a week and going over the
signal code, I can see your point of views where you guys are thinking its
ugly for the signal code to try and support what we want :)



> 
> 
> Finally. I never looked into drivers/vhost/ before so I don't understand
> this code at all, but let me ask anyway... Can we change vhost_dev_flush()
> to run the pending callbacks rather than wait for vhost_worker() ?
> I guess we can't, ->mm won't be correct, but can you confirm?
> 
> Oleg.
> 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/3] vhost-scsi: Fix alignment handling with windows

2023-05-25 Thread Mike Christie
On 5/25/23 2:57 AM, Michael S. Tsirkin wrote:
> On Wed, May 24, 2023 at 06:34:05PM -0500, Mike Christie wrote:
>> The linux block layer requires bios/requests to have lengths with a 512
>> byte alignment. Some drivers/layers like dm-crypt and the directi IO code
>> will test for it and just fail. Other drivers like SCSI just assume the
>> requirement is met and will end up in infinte retry loops. The problem
>> for drivers like SCSI is that it uses functions like blk_rq_cur_sectors
>> and blk_rq_sectors which divide the request's length by 512. If there's
>> lefovers then it just gets dropped. But other code in the block/scsi
>> layer may use blk_rq_bytes/blk_rq_cur_bytes and end up thinking there is
>> still data left and try to retry the cmd. We can then end up getting
>> stuck in retry loops where part of the block/scsi thinks there is data
>> left, but other parts think we want to do IOs of zero length.
>>
>> Linux will always check for alignment, but windows will not. When
>> vhost-scsi then translates the iovec it gets from a windows guest to a
>> scatterlist, we can end up with sg items where the sg->length is not
>> divisible by 512 due to the misaligned offset:
>>
>> sg[0].offset = 255;
>> sg[0].length = 3841;
>> sg...
>> sg[N].offset = 0;
>> sg[N].length = 255;
>>
>> When the lio backends then convert the SG to bios or other iovecs, we
>> end up sending them with the same misaligned values and can hit the
>> issues above.
>>
>> This just has us drop down to allocating a temp page and copying the data
>> when this happens. Because this adds a check that needs to loop over the
>> iovec in the main IO path, this patch adds an attribute that can be turned
>> on for just windows.
>>
>> Signed-off-by: Mike Christie 
> 
> I am assuming this triggers rarely, yes?
> 
> If so I would like to find a way to avoid setting an attribute.
> 
> I am guessing the main cost is an extra scan of iov.

The scan and a memory allocation to dup the iter. However, I see
iov_iter_revert and I think that might be what I needed before to
avoid the mem alloc so will try it out.


> vhost_scsi_iov_to_sgl already does a scan, how about changing
> it to fail if misaligned?
> We can then detect the relevant error code and try with a copy.
> 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 3/3] fork, vhost: Use CLONE_THREAD to fix freezer/ps regression

2023-05-25 Thread Eric W. Biederman
Oleg Nesterov  writes:

> On 05/24, Eric W. Biederman wrote:
>>
>> Oleg Nesterov  writes:
>>
>> > Yes, but probably SIGABRT/exit doesn't really differ from SIGKILL wrt
>> > vhost_worker().
>>
>> Actually I think it reveals that exiting with SIGABRT will cause
>> a deadlock.
>>
>> coredump_wait will wait for all of the threads to reach
>> coredump_task_exit.  Meanwhile vhost_worker is waiting for
>> all of the other threads to reach exit_files to close their
>> file descriptors.
>
> Indeed, I didn't think about this.
>
>
> So why do we actually need CLONE_THREAD ? Can't vhost_worker() be a kernel 
> thread?
>
> kthread_create() won't be convenient, but how about kernel_thread() ? it 
> inherits
> mm/cgroups/rlimits/etc, kthread_stop() should work just fine.

Basically with no patches that is where Linus's kernel is.

User complained about the new thread showing up in ps.  So the
exploration of how we could use CLONE_THREAD instead of what is
currently merged began.

Eric
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3] virtio: Add missing documentation for structure fields

2023-05-25 Thread Stefano Garzarella

On Thu, May 25, 2023 at 04:35:42PM +0200, Simon Horman wrote:

Add missing documentation for the vqs_list_lock field of struct virtio_device,
and the validate field of struct virtio_driver.

./scripts/kernel-doc says:

.../virtio.h:131: warning: Function parameter or member 'vqs_list_lock' not 
described in 'virtio_device'
.../virtio.h:192: warning: Function parameter or member 'validate' not 
described in 'virtio_driver'
2 warnings as Errors

No functional changes intended.

Signed-off-by: Simon Horman 
---
Changes in v3:
- As suggested by Stefano Garzarella
 + Drop inline comment for @vqs_list_lock which is now covered by Kdoc
 + Add "Returns 0 or -errno." to @validate Kdoc
- Link to v2: 
https://lore.kernel.org/r/20230510-virtio-kdoc-v2-1-1c5a20eb4...@kernel.org

Changes in v2:
- As suggested by Michael S. Tsirkin
 + @validate is not called on probe
 + @validate does validates config space
 + embarrassingly, validate was misspelt
- Link to v1: 
https://lore.kernel.org/r/20230510-virtio-kdoc-v1-1-d2b1824a9...@kernel.org
---
include/linux/virtio.h | 5 -
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b93238db94e3..de6041deee37 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -103,6 +103,7 @@ int virtqueue_resize(struct virtqueue *vq, u32 num,
 * @config_enabled: configuration change reporting enabled
 * @config_change_pending: configuration change reported while disabled
 * @config_lock: protects configuration change reporting
+ * @vqs_list_lock: protects @vqs.
 * @dev: underlying device.
 * @id: the device type identification (used to match it with a driver).
 * @config: the configuration ops for this device.
@@ -117,7 +118,7 @@ struct virtio_device {
bool config_enabled;
bool config_change_pending;
spinlock_t config_lock;
-   spinlock_t vqs_list_lock; /* Protects VQs list access */
+   spinlock_t vqs_list_lock;
struct device dev;
struct virtio_device_id id;
const struct virtio_config_ops *config;
@@ -160,6 +161,8 @@ size_t virtio_max_dma_size(const struct virtio_device 
*vdev);
 * @feature_table_size: number of entries in the feature table array.
 * @feature_table_legacy: same as feature_table but when working in legacy mode.
 * @feature_table_size_legacy: number of entries in feature table legacy array.
+ * @validate: the function to call to validate features and config space.
+ *Returns 0 or -errno.
 * @probe: the function to call when a device is found.  Returns 0 or -errno.
 * @scan: optional function to call after successful probe; intended
 *for virtio-scsi to invoke a scan.



LGTM!

Reviewed-by: Stefano Garzarella 

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 3/3] fork, vhost: Use CLONE_THREAD to fix freezer/ps regression

2023-05-25 Thread Oleg Nesterov
On 05/24, Eric W. Biederman wrote:
>
> Oleg Nesterov  writes:
>
> > Yes, but probably SIGABRT/exit doesn't really differ from SIGKILL wrt
> > vhost_worker().
>
> Actually I think it reveals that exiting with SIGABRT will cause
> a deadlock.
>
> coredump_wait will wait for all of the threads to reach
> coredump_task_exit.  Meanwhile vhost_worker is waiting for
> all of the other threads to reach exit_files to close their
> file descriptors.

Indeed, I didn't think about this.


So why do we actually need CLONE_THREAD ? Can't vhost_worker() be a kernel 
thread?

kthread_create() won't be convenient, but how about kernel_thread() ? it 
inherits
mm/cgroups/rlimits/etc, kthread_stop() should work just fine.

Oleg.

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 4/5] vDPA/ifcvf: synchronize irqs in the reset routine

2023-05-25 Thread Zhu, Lingshan



On 5/24/2023 4:03 PM, Jason Wang wrote:

On Mon, May 8, 2023 at 6:05 PM Zhu Lingshan  wrote:

This commit synchronize irqs of the virtqueues
and config space in the reset routine.
Thus ifcvf_stop_hw() and reset() are refactored as well.

Signed-off-by: Zhu Lingshan 
---
  drivers/vdpa/ifcvf/ifcvf_base.c | 41 +
  drivers/vdpa/ifcvf/ifcvf_base.h |  1 +
  drivers/vdpa/ifcvf/ifcvf_main.c | 46 +
  3 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c
index 79e313c5e10e..1f39290baa38 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.c
+++ b/drivers/vdpa/ifcvf/ifcvf_base.c
@@ -170,12 +170,9 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status)

  void ifcvf_reset(struct ifcvf_hw *hw)
  {
-   hw->config_cb.callback = NULL;
-   hw->config_cb.private = NULL;
-
 ifcvf_set_status(hw, 0);
-   /* flush set_status, make sure VF is stopped, reset */
-   ifcvf_get_status(hw);
+   while (ifcvf_get_status(hw))
+   msleep(1);
  }

  u64 ifcvf_get_hw_features(struct ifcvf_hw *hw)
@@ -368,20 +365,42 @@ void ifcvf_set_vq_ready(struct ifcvf_hw *hw, u16 qid, 
bool ready)
 vp_iowrite16(ready, >queue_enable);
  }

-static void ifcvf_hw_disable(struct ifcvf_hw *hw)
+static void ifcvf_reset_vring(struct ifcvf_hw *hw)
  {
-   u32 i;
+   u16 qid;
+
+   for (qid = 0; qid < hw->nr_vring; qid++) {
+   hw->vring[qid].cb.callback = NULL;
+   hw->vring[qid].cb.private = NULL;
+   ifcvf_set_vq_vector(hw, qid, VIRTIO_MSI_NO_VECTOR);
+   }
+}

+static void ifcvf_reset_config_handler(struct ifcvf_hw *hw)
+{
+   hw->config_cb.callback = NULL;
+   hw->config_cb.private = NULL;
 ifcvf_set_config_vector(hw, VIRTIO_MSI_NO_VECTOR);
-   for (i = 0; i < hw->nr_vring; i++) {
-   ifcvf_set_vq_vector(hw, i, VIRTIO_MSI_NO_VECTOR);
+}
+
+static void ifcvf_synchronize_irq(struct ifcvf_hw *hw)
+{
+   u32 nvectors = hw->num_msix_vectors;
+   struct pci_dev *pdev = hw->pdev;
+   int i, irq;
+
+   for (i = 0; i < nvectors; i++) {
+   irq = pci_irq_vector(pdev, i);
+   if (irq >= 0)
+   synchronize_irq(irq);
 }
  }

  void ifcvf_stop_hw(struct ifcvf_hw *hw)
  {
-   ifcvf_hw_disable(hw);
-   ifcvf_reset(hw);
+   ifcvf_synchronize_irq(hw);
+   ifcvf_reset_vring(hw);
+   ifcvf_reset_config_handler(hw);

Nit:

So the name of this function is kind of misleading since irq
synchronization and virtqueue/config handler are not belong to
hardware?

Maybe it would be better to call it ifcvf_stop().

Sure, I will send a V3 with this renaming,
do you ack patch 1/5?

Thanks
Zhu Lingshan


Thanks


  }

  void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index d34d3bc0dbf4..7430f80779be 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -82,6 +82,7 @@ struct ifcvf_hw {
 int vqs_reused_irq;
 u16 nr_vring;
 /* VIRTIO_PCI_CAP_DEVICE_CFG size */
+   u32 num_msix_vectors;
 u32 cap_dev_config_size;
 struct pci_dev *pdev;
  };
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 968687159e44..3401b9901dd2 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -125,6 +125,7 @@ static void ifcvf_free_irq(struct ifcvf_hw *vf)
 ifcvf_free_vq_irq(vf);
 ifcvf_free_config_irq(vf);
 ifcvf_free_irq_vectors(pdev);
+   vf->num_msix_vectors = 0;
  }

  /* ifcvf MSIX vectors allocator, this helper tries to allocate
@@ -343,36 +344,11 @@ static int ifcvf_request_irq(struct ifcvf_hw *vf)
 if (ret)
 return ret;

-   return 0;
-}
-
-static int ifcvf_stop_datapath(struct ifcvf_adapter *adapter)
-{
-   struct ifcvf_hw *vf = adapter->vf;
-   int i;
-
-   for (i = 0; i < vf->nr_vring; i++)
-   vf->vring[i].cb.callback = NULL;
-
-   ifcvf_stop_hw(vf);
+   vf->num_msix_vectors = nvectors;

 return 0;
  }

-static void ifcvf_reset_vring(struct ifcvf_adapter *adapter)
-{
-   struct ifcvf_hw *vf = adapter->vf;
-   int i;
-
-   for (i = 0; i < vf->nr_vring; i++) {
-   vf->vring[i].last_avail_idx = 0;
-   vf->vring[i].cb.callback = NULL;
-   vf->vring[i].cb.private = NULL;
-   }
-
-   ifcvf_reset(vf);
-}
-
  static struct ifcvf_adapter *vdpa_to_adapter(struct vdpa_device *vdpa_dev)
  {
 return container_of(vdpa_dev, struct ifcvf_adapter, vdpa);
@@ -462,23 +438,15 @@ static void ifcvf_vdpa_set_status(struct vdpa_device 
*vdpa_dev, u8 status)

  static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev)
  {
-   struct ifcvf_adapter *adapter;
-   struct ifcvf_hw *vf;
-   u8 

Re: [PATCH 0/3] vhost-scsi: Fix IO hangs when using windows

2023-05-25 Thread Michael S. Tsirkin
On Wed, May 24, 2023 at 06:34:04PM -0500, Mike Christie wrote:
> The following patches were made over Linus's tree and fix an issue
> where windows guests will send iovecs with offset/lengths that result
> in IOs that are not aligned to 512. The LIO layer will then send them
> to Linux's block layer but it requires 512 byte alignment, so depending
> on the block driver being used we will get IO errors or hung IO.
> 
> The following patches have vhost-scsi detect when windows sends these
> IOs and copy them to a bounce buffer. It then does some cleanup in
> the related code.

Normally with virtio we'd have a feature bit that allows skipping alignment
checks. Teach linux to set it. Stefan, WDYT?

-- 
MST

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/3] vhost-scsi: Fix alignment handling with windows

2023-05-25 Thread Michael S. Tsirkin
On Wed, May 24, 2023 at 06:34:05PM -0500, Mike Christie wrote:
> The linux block layer requires bios/requests to have lengths with a 512
> byte alignment. Some drivers/layers like dm-crypt and the directi IO code
> will test for it and just fail. Other drivers like SCSI just assume the
> requirement is met and will end up in infinte retry loops. The problem
> for drivers like SCSI is that it uses functions like blk_rq_cur_sectors
> and blk_rq_sectors which divide the request's length by 512. If there's
> lefovers then it just gets dropped. But other code in the block/scsi
> layer may use blk_rq_bytes/blk_rq_cur_bytes and end up thinking there is
> still data left and try to retry the cmd. We can then end up getting
> stuck in retry loops where part of the block/scsi thinks there is data
> left, but other parts think we want to do IOs of zero length.
> 
> Linux will always check for alignment, but windows will not. When
> vhost-scsi then translates the iovec it gets from a windows guest to a
> scatterlist, we can end up with sg items where the sg->length is not
> divisible by 512 due to the misaligned offset:
> 
> sg[0].offset = 255;
> sg[0].length = 3841;
> sg...
> sg[N].offset = 0;
> sg[N].length = 255;
> 
> When the lio backends then convert the SG to bios or other iovecs, we
> end up sending them with the same misaligned values and can hit the
> issues above.
> 
> This just has us drop down to allocating a temp page and copying the data
> when this happens. Because this adds a check that needs to loop over the
> iovec in the main IO path, this patch adds an attribute that can be turned
> on for just windows.
> 
> Signed-off-by: Mike Christie 

I am assuming this triggers rarely, yes?

If so I would like to find a way to avoid setting an attribute.

I am guessing the main cost is an extra scan of iov.
vhost_scsi_iov_to_sgl already does a scan, how about changing
it to fail if misaligned?
We can then detect the relevant error code and try with a copy.

WDYT?

> ---
>  drivers/vhost/scsi.c | 174 +--
>  1 file changed, 151 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
> index bb10fa4bb4f6..dbad8fb579eb 100644
> --- a/drivers/vhost/scsi.c
> +++ b/drivers/vhost/scsi.c
> @@ -25,6 +25,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -75,6 +77,9 @@ struct vhost_scsi_cmd {
>   u32 tvc_prot_sgl_count;
>   /* Saved unpacked SCSI LUN for vhost_scsi_target_queue_cmd() */
>   u32 tvc_lun;
> + u32 copied_iov:1;
> + const void *saved_iter_addr;
> + struct iov_iter saved_iter;
>   /* Pointer to the SGL formatted memory from virtio-scsi */
>   struct scatterlist *tvc_sgl;
>   struct scatterlist *tvc_prot_sgl;
> @@ -107,6 +112,7 @@ struct vhost_scsi_nexus {
>  struct vhost_scsi_tpg {
>   /* Vhost port target portal group tag for TCM */
>   u16 tport_tpgt;
> + bool check_io_alignment;
>   /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus 
> shutdown */
>   int tv_tpg_port_count;
>   /* Used for vhost_scsi device reference to tpg_nexus, protected by 
> tv_tpg_mutex */
> @@ -328,8 +334,13 @@ static void vhost_scsi_release_cmd_res(struct se_cmd 
> *se_cmd)
>   int i;
>  
>   if (tv_cmd->tvc_sgl_count) {
> - for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
> - put_page(sg_page(_cmd->tvc_sgl[i]));
> + for (i = 0; i < tv_cmd->tvc_sgl_count; i++) {
> + if (tv_cmd->copied_iov)
> + __free_page(sg_page(_cmd->tvc_sgl[i]));
> + else
> + put_page(sg_page(_cmd->tvc_sgl[i]));
> + }
> + kfree(tv_cmd->saved_iter_addr);
>   }
>   if (tv_cmd->tvc_prot_sgl_count) {
>   for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++)
> @@ -502,6 +513,27 @@ static void vhost_scsi_evt_work(struct vhost_work *work)
>   mutex_unlock(>mutex);
>  }
>  
> +static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd)
> +{
> + struct iov_iter *iter = >saved_iter;
> + struct scatterlist *sg = cmd->tvc_sgl;
> + struct page *page;
> + size_t len;
> + int i;
> +
> + for (i = 0; i < cmd->tvc_sgl_count; i++) {
> + page = sg_page([i]);
> + len = sg[i].length;
> +
> + if (copy_page_to_iter(page, 0, len, iter) != len) {
> + pr_err("Could not copy data. Error %lu\n", len);
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> +
>  /* Fill in status and signal that we are done processing this command
>   *
>   * This is scheduled in the vhost work queue so we are called with the owner
> @@ -525,15 +557,20 @@ static void vhost_scsi_complete_cmd_work(struct 
> vhost_work *work)
>  
>   pr_debug("%s tv_cmd %p resid %u status 

Re: [PATCH] tools/virtio: Add .gitignore to ringtest

2023-05-25 Thread Michael S. Tsirkin


Subject should be "for ringtest" not "to ringtest".

On Wed, May 24, 2023 at 08:36:12PM +0800, Rong Tao wrote:
> From: Rong Tao 
> 
> Ignore executions for ringtest.
> 
> Signed-off-by: Rong Tao 
> ---
>  tools/virtio/ringtest/.gitignore | 7 +++
>  1 file changed, 7 insertions(+)
>  create mode 100644 tools/virtio/ringtest/.gitignore
> 
> diff --git a/tools/virtio/ringtest/.gitignore 
> b/tools/virtio/ringtest/.gitignore
> new file mode 100644
> index ..100b9e30c0f4
> --- /dev/null
> +++ b/tools/virtio/ringtest/.gitignore
> @@ -0,0 +1,7 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +/noring
> +/ptr_ring
> +/ring
> +/virtio_ring_0_9
> +/virtio_ring_inorder
> +/virtio_ring_poll
> -- 
> 2.39.1
> 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] tools/virtio: Add .gitignore to ringtest

2023-05-25 Thread Michael S. Tsirkin
On Wed, May 24, 2023 at 08:36:12PM +0800, Rong Tao wrote:
> From: Rong Tao 
> 
> Ignore executions for ringtest.

I think you mean "executables".

> 
> Signed-off-by: Rong Tao 
> ---
>  tools/virtio/ringtest/.gitignore | 7 +++
>  1 file changed, 7 insertions(+)
>  create mode 100644 tools/virtio/ringtest/.gitignore
> 
> diff --git a/tools/virtio/ringtest/.gitignore 
> b/tools/virtio/ringtest/.gitignore
> new file mode 100644
> index ..100b9e30c0f4
> --- /dev/null
> +++ b/tools/virtio/ringtest/.gitignore
> @@ -0,0 +1,7 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +/noring
> +/ptr_ring
> +/ring
> +/virtio_ring_0_9
> +/virtio_ring_inorder
> +/virtio_ring_poll
> -- 
> 2.39.1
> 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] tools/virtio: Add .gitignore to ringtest

2023-05-25 Thread Michael S. Tsirkin
On Wed, May 24, 2023 at 08:36:12PM +0800, Rong Tao wrote:
> From: Rong Tao 
> 
> Ignore executions for ringtest.
> 
> Signed-off-by: Rong Tao 
> ---
>  tools/virtio/ringtest/.gitignore | 7 +++
>  1 file changed, 7 insertions(+)
>  create mode 100644 tools/virtio/ringtest/.gitignore
> 
> diff --git a/tools/virtio/ringtest/.gitignore 
> b/tools/virtio/ringtest/.gitignore
> new file mode 100644
> index ..100b9e30c0f4
> --- /dev/null
> +++ b/tools/virtio/ringtest/.gitignore
> @@ -0,0 +1,7 @@
> +# SPDX-License-Identifier: GPL-2.0-only

This one, with SPDX tag seems better.

> +/noring
> +/ptr_ring
> +/ring
> +/virtio_ring_0_9
> +/virtio_ring_inorder
> +/virtio_ring_poll
> -- 
> 2.39.1
> 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V3 net-next 1/2] virtio-net: convert rx mode setting to use workqueue

2023-05-25 Thread Michael S. Tsirkin
On Thu, May 25, 2023 at 11:43:34AM +0800, Jason Wang wrote:
> On Wed, May 24, 2023 at 5:15 PM Michael S. Tsirkin  wrote:
> >
> > On Wed, May 24, 2023 at 04:18:41PM +0800, Jason Wang wrote:
> > > This patch convert rx mode setting to be done in a workqueue, this is
> > > a must for allow to sleep when waiting for the cvq command to
> > > response since current code is executed under addr spin lock.
> > >
> > > Signed-off-by: Jason Wang 
> > > ---
> > > Changes since V1:
> > > - use RTNL to synchronize rx mode worker
> > > ---
> > >  drivers/net/virtio_net.c | 55 +---
> > >  1 file changed, 52 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 56ca1d270304..5d2f1da4eaa0 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -265,6 +265,12 @@ struct virtnet_info {
> > >   /* Work struct for config space updates */
> > >   struct work_struct config_work;
> > >
> > > + /* Work struct for config rx mode */
> >
> > With a bit less abbreviation maybe? setting rx mode?
> 
> That's fine.
> 
> >
> > > + struct work_struct rx_mode_work;
> > > +
> > > + /* Is rx mode work enabled? */
> >
> > Ugh not a great comment.
> 
> Any suggestions for this. E.g we had:
> 
> /* Is delayed refill enabled? */

/* OK to queue work setting RX mode? */


> >
> > > + bool rx_mode_work_enabled;
> > > +
> >
> >
> >
> > >   /* Does the affinity hint is set for virtqueues? */
> > >   bool affinity_hint_set;
> > >
> > > @@ -388,6 +394,20 @@ static void disable_delayed_refill(struct 
> > > virtnet_info *vi)
> > >   spin_unlock_bh(>refill_lock);
> > >  }
> > >
> > > +static void enable_rx_mode_work(struct virtnet_info *vi)
> > > +{
> > > + rtnl_lock();
> > > + vi->rx_mode_work_enabled = true;
> > > + rtnl_unlock();
> > > +}
> > > +
> > > +static void disable_rx_mode_work(struct virtnet_info *vi)
> > > +{
> > > + rtnl_lock();
> > > + vi->rx_mode_work_enabled = false;
> > > + rtnl_unlock();
> > > +}
> > > +
> > >  static void virtqueue_napi_schedule(struct napi_struct *napi,
> > >   struct virtqueue *vq)
> > >  {
> > > @@ -2341,9 +2361,11 @@ static int virtnet_close(struct net_device *dev)
> > >   return 0;
> > >  }
> > >
> > > -static void virtnet_set_rx_mode(struct net_device *dev)
> > > +static void virtnet_rx_mode_work(struct work_struct *work)
> > >  {
> > > - struct virtnet_info *vi = netdev_priv(dev);
> > > + struct virtnet_info *vi =
> > > + container_of(work, struct virtnet_info, rx_mode_work);
> > > + struct net_device *dev = vi->dev;
> > >   struct scatterlist sg[2];
> > >   struct virtio_net_ctrl_mac *mac_data;
> > >   struct netdev_hw_addr *ha;
> > > @@ -2356,6 +2378,8 @@ static void virtnet_set_rx_mode(struct net_device 
> > > *dev)
> > >   if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
> > >   return;
> > >
> > > + rtnl_lock();
> > > +
> > >   vi->ctrl->promisc = ((dev->flags & IFF_PROMISC) != 0);
> > >   vi->ctrl->allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
> > >
> > > @@ -2373,14 +2397,19 @@ static void virtnet_set_rx_mode(struct net_device 
> > > *dev)
> > >   dev_warn(>dev, "Failed to %sable allmulti mode.\n",
> > >vi->ctrl->allmulti ? "en" : "dis");
> > >
> > > + netif_addr_lock_bh(dev);
> > > +
> > >   uc_count = netdev_uc_count(dev);
> > >   mc_count = netdev_mc_count(dev);
> > >   /* MAC filter - use one buffer for both lists */
> > >   buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
> > > (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
> > >   mac_data = buf;
> > > - if (!buf)
> > > + if (!buf) {
> > > + netif_addr_unlock_bh(dev);
> > > + rtnl_unlock();
> > >   return;
> > > + }
> > >
> > >   sg_init_table(sg, 2);
> > >
> > > @@ -2401,6 +2430,8 @@ static void virtnet_set_rx_mode(struct net_device 
> > > *dev)
> > >   netdev_for_each_mc_addr(ha, dev)
> > >   memcpy(_data->macs[i++][0], ha->addr, ETH_ALEN);
> > >
> > > + netif_addr_unlock_bh(dev);
> > > +
> > >   sg_set_buf([1], mac_data,
> > >  sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
> > >
> > > @@ -2408,9 +2439,19 @@ static void virtnet_set_rx_mode(struct net_device 
> > > *dev)
> > > VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
> > >   dev_warn(>dev, "Failed to set MAC filter table.\n");
> > >
> > > + rtnl_unlock();
> > > +
> > >   kfree(buf);
> > >  }
> > >
> > > +static void virtnet_set_rx_mode(struct net_device *dev)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > +
> > > + if (vi->rx_mode_work_enabled)
> > > + schedule_work(>rx_mode_work);
> > > +}
> > > +
> >
> > >  static int