[PATCH 5.16 140/285] virtio_console: eliminate anonymous module_init & module_exit

2022-04-12 Thread Greg Kroah-Hartman
From: Randy Dunlap 

[ Upstream commit fefb8a2a941338d871e2d83fbd65fbfa068857bd ]

Eliminate anonymous module_init() and module_exit(), which can lead to
confusion or ambiguity when reading System.map, crashes/oops/bugs,
or an initcall_debug log.

Give each of these init and exit functions unique driver-specific
names to eliminate the anonymous names.

Example 1: (System.map)
 832fc78c t init
 832fc79e t init
 832fc8f8 t init

Example 2: (initcall_debug log)
 calling  init+0x0/0x12 @ 1
 initcall init+0x0/0x12 returned 0 after 15 usecs
 calling  init+0x0/0x60 @ 1
 initcall init+0x0/0x60 returned 0 after 2 usecs
 calling  init+0x0/0x9a @ 1
 initcall init+0x0/0x9a returned 0 after 74 usecs

Signed-off-by: Randy Dunlap 
Reviewed-by: Amit Shah 
Cc: virtualization@lists.linux-foundation.org
Cc: Arnd Bergmann 
Link: https://lore.kernel.org/r/20220316192010.19001-3-rdun...@infradead.org
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Sasha Levin 
---
 drivers/char/virtio_console.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index f864b17be7e3..35025f283bf6 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -2245,7 +2245,7 @@ static struct virtio_driver virtio_rproc_serial = {
.remove =   virtcons_remove,
 };
 
-static int __init init(void)
+static int __init virtio_console_init(void)
 {
int err;
 
@@ -2280,7 +2280,7 @@ static int __init init(void)
return err;
 }
 
-static void __exit fini(void)
+static void __exit virtio_console_fini(void)
 {
reclaim_dma_bufs();
 
@@ -2290,8 +2290,8 @@ static void __exit fini(void)
class_destroy(pdrvdata.class);
debugfs_remove_recursive(pdrvdata.debugfs_dir);
 }
-module_init(init);
-module_exit(fini);
+module_init(virtio_console_init);
+module_exit(virtio_console_fini);
 
 MODULE_DESCRIPTION("Virtio console driver");
 MODULE_LICENSE("GPL");
-- 
2.35.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v9 23/32] virtio_pci: queue_reset: support VIRTIO_F_RING_RESET

2022-04-12 Thread Jason Wang


在 2022/4/6 上午11:43, Xuan Zhuo 写道:

This patch implements virtio pci support for QUEUE RESET.

Performing reset on a queue is divided into these steps:

  1. notify the device to reset the queue
  2. recycle the buffer submitted
  3. reset the vring (may re-alloc)
  4. mmap vring to device, and enable the queue

This patch implements virtio_reset_vq(), virtio_enable_resetq() in the
pci scenario.

Signed-off-by: Xuan Zhuo 
---
  drivers/virtio/virtio_pci_common.c |  8 +--
  drivers/virtio/virtio_pci_modern.c | 84 ++
  drivers/virtio/virtio_ring.c   |  2 +
  include/linux/virtio.h |  1 +
  4 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c 
b/drivers/virtio/virtio_pci_common.c
index fdbde1db5ec5..863d3a8a0956 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -248,9 +248,11 @@ static void vp_del_vq(struct virtqueue *vq)
struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
unsigned long flags;
  
-	spin_lock_irqsave(&vp_dev->lock, flags);

-   list_del(&info->node);
-   spin_unlock_irqrestore(&vp_dev->lock, flags);
+   if (!vq->reset) {



On which condition that we may hit this path?



+   spin_lock_irqsave(&vp_dev->lock, flags);
+   list_del(&info->node);
+   spin_unlock_irqrestore(&vp_dev->lock, flags);
+   }
  
  	vp_dev->del_vq(info);

kfree(info);
diff --git a/drivers/virtio/virtio_pci_modern.c 
b/drivers/virtio/virtio_pci_modern.c
index 49a4493732cf..cb5d38f1c9c8 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device *vdev, 
u64 features)
if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
__virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
+
+   if (features & BIT_ULL(VIRTIO_F_RING_RESET))
+   __virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
  }
  
  /* virtio config->finalize_features() implementation */

@@ -199,6 +202,83 @@ static int vp_active_vq(struct virtqueue *vq, u16 msix_vec)
return 0;
  }
  
+static int vp_modern_reset_vq(struct virtqueue *vq)

+{
+   struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+   struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+   struct virtio_pci_vq_info *info;
+   unsigned long flags;
+
+   if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET))
+   return -ENOENT;
+
+   vp_modern_set_queue_reset(mdev, vq->index);
+
+   info = vp_dev->vqs[vq->index];
+
+   /* delete vq from irq handler */
+   spin_lock_irqsave(&vp_dev->lock, flags);
+   list_del(&info->node);
+   spin_unlock_irqrestore(&vp_dev->lock, flags);
+
+   INIT_LIST_HEAD(&info->node);
+
+   /* For the case where vq has an exclusive irq, to prevent the irq from
+* being received again and the pending irq, call disable_irq().
+*
+* In the scenario based on shared interrupts, vq will be searched from
+* the queue virtqueues. Since the previous list_del() has been deleted
+* from the queue, it is impossible for vq to be called in this case.
+* There is no need to close the corresponding interrupt.
+*/
+   if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
+   disable_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));



See the previous discussion and the revert of the first try to harden 
the interrupt. We probably can't use disable_irq() since it conflicts 
with the affinity managed IRQ that is used by some drivers.


We need to use synchonize_irq() and per virtqueue flag instead. As 
mentioned in previous patches, this could be done on top of my rework on 
the IRQ hardening .




+
+   vq->reset = true;
+
+   return 0;
+}
+
+static int vp_modern_enable_reset_vq(struct virtqueue *vq)
+{
+   struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+   struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+   struct virtio_pci_vq_info *info;
+   unsigned long flags, index;
+   int err;
+
+   if (!vq->reset)
+   return -EBUSY;
+
+   index = vq->index;
+   info = vp_dev->vqs[index];
+
+   /* check queue reset status */
+   if (vp_modern_get_queue_reset(mdev, index) != 1)
+   return -EBUSY;
+
+   err = vp_active_vq(vq, info->msix_vector);
+   if (err)
+   return err;
+
+   if (vq->callback) {
+   spin_lock_irqsave(&vp_dev->lock, flags);
+   list_add(&info->node, &vp_dev->virtqueues);
+   spin_unlock_irqrestore(&vp_dev->lock, flags);
+   } else {
+   INIT_LIST_HEAD(&info->node);
+   }
+
+   vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
+
+   if (

Re: [PATCH v9 24/32] virtio: find_vqs() add arg sizes

2022-04-12 Thread Jason Wang


在 2022/4/6 上午11:43, Xuan Zhuo 写道:

find_vqs() adds a new parameter sizes to specify the size of each vq
vring.

0 means use the maximum size supported by the backend.



Does this mean driver still need to prepare a array of 0 or it can 
simply pass NULL to find_vqs()?


Thanks




In the split scenario, the meaning of size is the largest size, because
it may be limited by memory, the virtio core will try a smaller size.
And the size is power of 2.

Signed-off-by: Xuan Zhuo 
Acked-by: Hans de Goede 
Reviewed-by: Mathieu Poirier 
---
  arch/um/drivers/virtio_uml.c |  2 +-
  drivers/platform/mellanox/mlxbf-tmfifo.c |  1 +
  drivers/remoteproc/remoteproc_virtio.c   |  1 +
  drivers/s390/virtio/virtio_ccw.c |  1 +
  drivers/virtio/virtio_mmio.c |  1 +
  drivers/virtio/virtio_pci_common.c   |  2 +-
  drivers/virtio/virtio_pci_common.h   |  2 +-
  drivers/virtio/virtio_pci_modern.c   |  7 +--
  drivers/virtio/virtio_vdpa.c |  1 +
  include/linux/virtio_config.h| 14 +-
  10 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
index 904993d15a85..6af98d130840 100644
--- a/arch/um/drivers/virtio_uml.c
+++ b/arch/um/drivers/virtio_uml.c
@@ -998,7 +998,7 @@ static struct virtqueue *vu_setup_vq(struct virtio_device 
*vdev,
  
  static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,

   struct virtqueue *vqs[], vq_callback_t *callbacks[],
-  const char * const names[], const bool *ctx,
+  const char * const names[], u32 sizes[], const bool *ctx,
   struct irq_affinity *desc)
  {
struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c 
b/drivers/platform/mellanox/mlxbf-tmfifo.c
index 1ae3c56b66b0..8be13d416f48 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -928,6 +928,7 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct 
virtio_device *vdev,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char * const names[],
+   u32 sizes[],
const bool *ctx,
struct irq_affinity *desc)
  {
diff --git a/drivers/remoteproc/remoteproc_virtio.c 
b/drivers/remoteproc/remoteproc_virtio.c
index 7611755d0ae2..baad31c9da45 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -158,6 +158,7 @@ static int rproc_virtio_find_vqs(struct virtio_device 
*vdev, unsigned int nvqs,
 struct virtqueue *vqs[],
 vq_callback_t *callbacks[],
 const char * const names[],
+u32 sizes[],
 const bool * ctx,
 struct irq_affinity *desc)
  {
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 468da60b56c5..f0c814a54e78 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -634,6 +634,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, 
unsigned nvqs,
   struct virtqueue *vqs[],
   vq_callback_t *callbacks[],
   const char * const names[],
+  u32 sizes[],
   const bool *ctx,
   struct irq_affinity *desc)
  {
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index a41abc8051b9..9d5a674bdeec 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -461,6 +461,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned 
nvqs,
   struct virtqueue *vqs[],
   vq_callback_t *callbacks[],
   const char * const names[],
+  u32 sizes[],
   const bool *ctx,
   struct irq_affinity *desc)
  {
diff --git a/drivers/virtio/virtio_pci_common.c 
b/drivers/virtio/virtio_pci_common.c
index 863d3a8a0956..826ea2e35d54 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -427,7 +427,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, 
unsigned nvqs,
  /* the config->find_vqs() implementation */
  int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[], vq_callback_t *callbacks[],
-   const char * const names[], const bool *ctx,
+   const char * const names[], u32 sizes[], const bool *ctx,
struct irq_affinity *d

Re: [PATCH v9 25/32] virtio_pci: support the arg sizes of find_vqs()

2022-04-12 Thread Jason Wang


在 2022/4/6 上午11:43, Xuan Zhuo 写道:

Virtio PCI supports new parameter sizes of find_vqs().

Signed-off-by: Xuan Zhuo 



Acked-by: Jason Wang 



---
  drivers/virtio/virtio_pci_common.c | 18 ++
  drivers/virtio/virtio_pci_common.h |  1 +
  drivers/virtio/virtio_pci_legacy.c |  6 +-
  drivers/virtio/virtio_pci_modern.c | 10 +++---
  4 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c 
b/drivers/virtio/virtio_pci_common.c
index 826ea2e35d54..23976c61583f 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -208,6 +208,7 @@ static int vp_request_msix_vectors(struct virtio_device 
*vdev, int nvectors,
  static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned 
index,
 void (*callback)(struct virtqueue *vq),
 const char *name,
+u32 size,
 bool ctx,
 u16 msix_vec)
  {
@@ -220,7 +221,7 @@ static struct virtqueue *vp_setup_vq(struct virtio_device 
*vdev, unsigned index,
if (!info)
return ERR_PTR(-ENOMEM);
  
-	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,

+   vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, size, ctx,
  msix_vec);
if (IS_ERR(vq))
goto out_info;
@@ -314,7 +315,7 @@ void vp_del_vqs(struct virtio_device *vdev)
  
  static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,

struct virtqueue *vqs[], vq_callback_t *callbacks[],
-   const char * const names[], bool per_vq_vectors,
+   const char * const names[], u32 sizes[], bool per_vq_vectors,
const bool *ctx,
struct irq_affinity *desc)
  {
@@ -357,8 +358,8 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, 
unsigned nvqs,
else
msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
-ctx ? ctx[i] : false,
-msix_vec);
+sizes ? sizes[i] : 0,
+ctx ? ctx[i] : false, msix_vec);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
goto error_find;
@@ -388,7 +389,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, 
unsigned nvqs,
  
  static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,

struct virtqueue *vqs[], vq_callback_t *callbacks[],
-   const char * const names[], const bool *ctx)
+   const char * const names[], u32 sizes[], const bool *ctx)
  {
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i, err, queue_idx = 0;
@@ -410,6 +411,7 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, 
unsigned nvqs,
continue;
}
vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
+sizes ? sizes[i] : 0,
 ctx ? ctx[i] : false,
 VIRTIO_MSI_NO_VECTOR);
if (IS_ERR(vqs[i])) {
@@ -433,15 +435,15 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
int err;
  
  	/* Try MSI-X with one vector per queue. */

-   err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, ctx, 
desc);
+   err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, true, 
ctx, desc);
if (!err)
return 0;
/* Fallback: MSI-X with one vector for config, one shared for queues. */
-   err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, ctx, 
desc);
+   err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, sizes, false, 
ctx, desc);
if (!err)
return 0;
/* Finally fall back to regular interrupts. */
-   return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, ctx);
+   return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names, sizes, ctx);
  }
  
  const char *vp_bus_name(struct virtio_device *vdev)

diff --git a/drivers/virtio/virtio_pci_common.h 
b/drivers/virtio/virtio_pci_common.h
index 859eed559e10..fbf5a6d4b164 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -81,6 +81,7 @@ struct virtio_pci_device {
  unsigned idx,
  void (*callback)(struct virtqueue *vq),
  const char *name,
+ u32 size,
  bool ctx,
  u16 msix_vec);
void (*del_vq)

Re: [PATCH v9 26/32] virtio_mmio: support the arg sizes of find_vqs()

2022-04-12 Thread Jason Wang


在 2022/4/6 上午11:43, Xuan Zhuo 写道:

Virtio MMIO support the new parameter sizes of find_vqs().

Signed-off-by: Xuan Zhuo 
---



Acked-by: Jason Wang 



  drivers/virtio/virtio_mmio.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 9d5a674bdeec..51cf51764a92 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -347,7 +347,7 @@ static void vm_del_vqs(struct virtio_device *vdev)
  
  static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,

  void (*callback)(struct virtqueue *vq),
- const char *name, bool ctx)
+ const char *name, u32 size, bool ctx)
  {
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtio_mmio_vq_info *info;
@@ -382,8 +382,11 @@ static struct virtqueue *vm_setup_vq(struct virtio_device 
*vdev, unsigned index,
goto error_new_virtqueue;
}
  
+	if (!size || size > num)

+   size = num;
+
/* Create the vring */
-   vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev,
+   vq = vring_create_virtqueue(index, size, VIRTIO_MMIO_VRING_ALIGN, vdev,
 true, true, ctx, vm_notify, callback, name);
if (!vq) {
err = -ENOMEM;
@@ -484,6 +487,7 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned 
nvqs,
}
  
  		vqs[i] = vm_setup_vq(vdev, queue_idx++, callbacks[i], names[i],

+sizes ? sizes[i] : 0,
 ctx ? ctx[i] : false);
if (IS_ERR(vqs[i])) {
vm_del_vqs(vdev);


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH 5.17 184/343] virtio_console: eliminate anonymous module_init & module_exit

2022-04-12 Thread Greg Kroah-Hartman
From: Randy Dunlap 

[ Upstream commit fefb8a2a941338d871e2d83fbd65fbfa068857bd ]

Eliminate anonymous module_init() and module_exit(), which can lead to
confusion or ambiguity when reading System.map, crashes/oops/bugs,
or an initcall_debug log.

Give each of these init and exit functions unique driver-specific
names to eliminate the anonymous names.

Example 1: (System.map)
 832fc78c t init
 832fc79e t init
 832fc8f8 t init

Example 2: (initcall_debug log)
 calling  init+0x0/0x12 @ 1
 initcall init+0x0/0x12 returned 0 after 15 usecs
 calling  init+0x0/0x60 @ 1
 initcall init+0x0/0x60 returned 0 after 2 usecs
 calling  init+0x0/0x9a @ 1
 initcall init+0x0/0x9a returned 0 after 74 usecs

Signed-off-by: Randy Dunlap 
Reviewed-by: Amit Shah 
Cc: virtualization@lists.linux-foundation.org
Cc: Arnd Bergmann 
Link: https://lore.kernel.org/r/20220316192010.19001-3-rdun...@infradead.org
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Sasha Levin 
---
 drivers/char/virtio_console.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index e3c430539a17..9fa3c76a267f 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -2245,7 +2245,7 @@ static struct virtio_driver virtio_rproc_serial = {
.remove =   virtcons_remove,
 };
 
-static int __init init(void)
+static int __init virtio_console_init(void)
 {
int err;
 
@@ -2280,7 +2280,7 @@ static int __init init(void)
return err;
 }
 
-static void __exit fini(void)
+static void __exit virtio_console_fini(void)
 {
reclaim_dma_bufs();
 
@@ -2290,8 +2290,8 @@ static void __exit fini(void)
class_destroy(pdrvdata.class);
debugfs_remove_recursive(pdrvdata.debugfs_dir);
 }
-module_init(init);
-module_exit(fini);
+module_init(virtio_console_init);
+module_exit(virtio_console_fini);
 
 MODULE_DESCRIPTION("Virtio console driver");
 MODULE_LICENSE("GPL");
-- 
2.35.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v9 27/32] virtio: add helper virtio_find_vqs_ctx_size()

2022-04-12 Thread Jason Wang


在 2022/4/6 上午11:43, Xuan Zhuo 写道:

Introduce helper virtio_find_vqs_ctx_size() to call find_vqs and specify
the maximum size of each vq ring.

Signed-off-by: Xuan Zhuo 



Acked-by: Jason Wang 



---
  include/linux/virtio_config.h | 12 
  1 file changed, 12 insertions(+)

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 0f7def7ddfd2..22e29c926946 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -235,6 +235,18 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, 
unsigned nvqs,
  ctx, desc);
  }
  
+static inline

+int virtio_find_vqs_ctx_size(struct virtio_device *vdev, u32 nvqs,
+struct virtqueue *vqs[],
+vq_callback_t *callbacks[],
+const char * const names[],
+u32 sizes[],
+const bool *ctx, struct irq_affinity *desc)
+{
+   return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, sizes,
+ ctx, desc);
+}
+
  /**
   * virtio_device_ready - enable vq use in probe function
   * @vdev: the device


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v9 28/32] virtio_net: set the default max ring size by find_vqs()

2022-04-12 Thread Jason Wang


在 2022/4/6 上午11:43, Xuan Zhuo 写道:

Use virtio_find_vqs_ctx_size() to specify the maximum ring size of tx,
rx at the same time.

  | rx/tx ring size
---
speed == UNKNOWN or < 10G| 1024
speed < 40G  | 4096
speed >= 40G | 8192

Call virtnet_update_settings() once before calling init_vqs() to update
speed.

Signed-off-by: Xuan Zhuo 
---



Acked-by: Jason Wang 



  drivers/net/virtio_net.c | 42 
  1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index a801ea40908f..dad497a47b3a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2861,6 +2861,29 @@ static unsigned int mergeable_min_buf_len(struct 
virtnet_info *vi, struct virtqu
   (unsigned int)GOOD_PACKET_LEN);
  }
  
+static void virtnet_config_sizes(struct virtnet_info *vi, u32 *sizes)

+{
+   u32 i, rx_size, tx_size;
+
+   if (vi->speed == SPEED_UNKNOWN || vi->speed < SPEED_1) {
+   rx_size = 1024;
+   tx_size = 1024;
+
+   } else if (vi->speed < SPEED_4) {
+   rx_size = 1024 * 4;
+   tx_size = 1024 * 4;
+
+   } else {
+   rx_size = 1024 * 8;
+   tx_size = 1024 * 8;
+   }
+
+   for (i = 0; i < vi->max_queue_pairs; i++) {
+   sizes[rxq2vq(i)] = rx_size;
+   sizes[txq2vq(i)] = tx_size;
+   }
+}
+
  static int virtnet_find_vqs(struct virtnet_info *vi)
  {
vq_callback_t **callbacks;
@@ -2868,6 +2891,7 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
int ret = -ENOMEM;
int i, total_vqs;
const char **names;
+   u32 *sizes;
bool *ctx;
  
  	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by

@@ -2895,10 +2919,15 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
ctx = NULL;
}
  
+	sizes = kmalloc_array(total_vqs, sizeof(*sizes), GFP_KERNEL);

+   if (!sizes)
+   goto err_sizes;
+
/* Parameters for control virtqueue, if any */
if (vi->has_cvq) {
callbacks[total_vqs - 1] = NULL;
names[total_vqs - 1] = "control";
+   sizes[total_vqs - 1] = 64;
}
  
  	/* Allocate/initialize parameters for send/receive virtqueues */

@@ -2913,8 +2942,10 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
ctx[rxq2vq(i)] = true;
}
  
-	ret = virtio_find_vqs_ctx(vi->vdev, total_vqs, vqs, callbacks,

- names, ctx, NULL);
+   virtnet_config_sizes(vi, sizes);
+
+   ret = virtio_find_vqs_ctx_size(vi->vdev, total_vqs, vqs, callbacks,
+  names, sizes, ctx, NULL);
if (ret)
goto err_find;
  
@@ -2934,6 +2965,8 @@ static int virtnet_find_vqs(struct virtnet_info *vi)
  
  
  err_find:

+   kfree(sizes);
+err_sizes:
kfree(ctx);
  err_ctx:
kfree(names);
@@ -3252,6 +3285,9 @@ static int virtnet_probe(struct virtio_device *vdev)
vi->curr_queue_pairs = num_online_cpus();
vi->max_queue_pairs = max_queue_pairs;
  
+	virtnet_init_settings(dev);

+   virtnet_update_settings(vi);
+
/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
err = init_vqs(vi);
if (err)
@@ -3264,8 +3300,6 @@ static int virtnet_probe(struct virtio_device *vdev)
netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
  
-	virtnet_init_settings(dev);

-
if (virtio_has_feature(vdev, VIRTIO_NET_F_STANDBY)) {
vi->failover = net_failover_create(vi->dev);
if (IS_ERR(vi->failover)) {


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v9 29/32] virtio_net: get ringparam by virtqueue_get_vring_max_size()

2022-04-12 Thread Jason Wang


在 2022/4/6 上午11:43, Xuan Zhuo 写道:

Use virtqueue_get_vring_max_size() in virtnet_get_ringparam() to set
tx,rx_max_pending.

Signed-off-by: Xuan Zhuo 
---



Acked-by: Jason Wang 



  drivers/net/virtio_net.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index dad497a47b3a..96d96c666c8c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2177,10 +2177,10 @@ static void virtnet_get_ringparam(struct net_device 
*dev,
  {
struct virtnet_info *vi = netdev_priv(dev);
  
-	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);

-   ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
-   ring->rx_pending = ring->rx_max_pending;
-   ring->tx_pending = ring->tx_max_pending;
+   ring->rx_max_pending = virtqueue_get_vring_max_size(vi->rq[0].vq);
+   ring->tx_max_pending = virtqueue_get_vring_max_size(vi->sq[0].vq);
+   ring->rx_pending = virtqueue_get_vring_size(vi->rq[0].vq);
+   ring->tx_pending = virtqueue_get_vring_size(vi->sq[0].vq);
  }
  
  


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v3 11/15] drm/shmem-helper: Add generic memory shrinker

2022-04-12 Thread kernel test robot
Hi Dmitry,

I love your patch! Perhaps something to improve:

[auto build test WARNING on next-20220411]
[cannot apply to drm/drm-next v5.18-rc2 v5.18-rc1 v5.17 v5.18-rc2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/intel-lab-lkp/linux/commits/Dmitry-Osipenko/Add-generic-memory-shrinker-to-VirtIO-GPU-and-Panfrost-DRM-drivers/20220412-060325
base:d12d7e1cfe38e0c36d28c7a9fbbc436ad0d17c14
config: i386-randconfig-a005-20220411 
(https://download.01.org/0day-ci/archive/20220412/202204121523.qvmxovzg-...@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 
fe2478d44e4f7f191c43fef629ac7a23d0251e72)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/683ba8a9d72ba7770a61a9266a2b33949f3874f2
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Dmitry-Osipenko/Add-generic-memory-shrinker-to-VirtIO-GPU-and-Panfrost-DRM-drivers/20220412-060325
git checkout 683ba8a9d72ba7770a61a9266a2b33949f3874f2
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
O=build_dir ARCH=i386 SHELL=/bin/bash drivers/gpu/drm/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/drm_gem_shmem_helper.c:289:11: warning: variable 'new_state' 
>> is used uninitialized whenever 'if' condition is false 
>> [-Wsometimes-uninitialized]
   else if (shmem->madv < 0)
^~~
   drivers/gpu/drm/drm_gem_shmem_helper.c:292:46: note: uninitialized use 
occurs here
   drm_gem_shmem_set_pages_state_locked(shmem, new_state);
   ^
   drivers/gpu/drm/drm_gem_shmem_helper.c:289:7: note: remove the 'if' if its 
condition is always true
   else if (shmem->madv < 0)
^~~~
   drivers/gpu/drm/drm_gem_shmem_helper.c:278:2: note: variable 'new_state' is 
declared here
   enum drm_gem_shmem_pages_state new_state;
   ^
   1 warning generated.


vim +289 drivers/gpu/drm/drm_gem_shmem_helper.c

   273  
   274  static void drm_gem_shmem_update_pages_state_locked(struct 
drm_gem_shmem_object *shmem)
   275  {
   276  struct drm_gem_object *obj = &shmem->base;
   277  struct drm_gem_shmem_shrinker *gem_shrinker = 
obj->dev->shmem_shrinker;
   278  enum drm_gem_shmem_pages_state new_state;
   279  
   280  if (!gem_shrinker || obj->import_attach)
   281  return;
   282  
   283  mutex_lock(&gem_shrinker->lock);
   284  
   285  if (!shmem->madv)
   286  new_state = DRM_GEM_SHMEM_PAGES_STATE_ACTIVE;
   287  else if (shmem->madv > 0)
   288  new_state = DRM_GEM_SHMEM_PAGES_STATE_PURGEABLE;
 > 289  else if (shmem->madv < 0)
   290  new_state = DRM_GEM_SHMEM_PAGES_STATE_PURGED;
   291  
   292  drm_gem_shmem_set_pages_state_locked(shmem, new_state);
   293  
   294  mutex_unlock(&gem_shrinker->lock);
   295  }
   296  

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3 11/15] drm/shmem-helper: Add generic memory shrinker

2022-04-12 Thread kernel test robot
Hi Dmitry,

I love your patch! Perhaps something to improve:

[auto build test WARNING on next-20220411]
[cannot apply to drm/drm-next v5.18-rc2 v5.18-rc1 v5.17 v5.18-rc2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/intel-lab-lkp/linux/commits/Dmitry-Osipenko/Add-generic-memory-shrinker-to-VirtIO-GPU-and-Panfrost-DRM-drivers/20220412-060325
base:d12d7e1cfe38e0c36d28c7a9fbbc436ad0d17c14
config: hexagon-randconfig-r045-20220411 
(https://download.01.org/0day-ci/archive/20220412/202204121504.glr3fhqe-...@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 
fe2478d44e4f7f191c43fef629ac7a23d0251e72)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/683ba8a9d72ba7770a61a9266a2b33949f3874f2
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Dmitry-Osipenko/Add-generic-memory-shrinker-to-VirtIO-GPU-and-Panfrost-DRM-drivers/20220412-060325
git checkout 683ba8a9d72ba7770a61a9266a2b33949f3874f2
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
O=build_dir ARCH=hexagon SHELL=/bin/bash drivers/gpu/drm/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/drm_gem_shmem_helper.c:289:11: warning: variable 'new_state' 
>> is used uninitialized whenever 'if' condition is false 
>> [-Wsometimes-uninitialized]
   else if (shmem->madv < 0)
^~~
   drivers/gpu/drm/drm_gem_shmem_helper.c:292:46: note: uninitialized use 
occurs here
   drm_gem_shmem_set_pages_state_locked(shmem, new_state);
   ^
   drivers/gpu/drm/drm_gem_shmem_helper.c:289:7: note: remove the 'if' if its 
condition is always true
   else if (shmem->madv < 0)
^~~~
   drivers/gpu/drm/drm_gem_shmem_helper.c:278:2: note: variable 'new_state' is 
declared here
   enum drm_gem_shmem_pages_state new_state;
   ^
   1 warning generated.


vim +289 drivers/gpu/drm/drm_gem_shmem_helper.c

   273  
   274  static void drm_gem_shmem_update_pages_state_locked(struct 
drm_gem_shmem_object *shmem)
   275  {
   276  struct drm_gem_object *obj = &shmem->base;
   277  struct drm_gem_shmem_shrinker *gem_shrinker = 
obj->dev->shmem_shrinker;
   278  enum drm_gem_shmem_pages_state new_state;
   279  
   280  if (!gem_shrinker || obj->import_attach)
   281  return;
   282  
   283  mutex_lock(&gem_shrinker->lock);
   284  
   285  if (!shmem->madv)
   286  new_state = DRM_GEM_SHMEM_PAGES_STATE_ACTIVE;
   287  else if (shmem->madv > 0)
   288  new_state = DRM_GEM_SHMEM_PAGES_STATE_PURGEABLE;
 > 289  else if (shmem->madv < 0)
   290  new_state = DRM_GEM_SHMEM_PAGES_STATE_PURGED;
   291  
   292  drm_gem_shmem_set_pages_state_locked(shmem, new_state);
   293  
   294  mutex_unlock(&gem_shrinker->lock);
   295  }
   296  

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 4/5] virtio-pci: implement synchronize_vqs()

2022-04-12 Thread Halil Pasic
On Tue, 12 Apr 2022 10:24:35 +0800
Jason Wang  wrote:

> > Regarding the question "are we safe against notifications before
> > indicators have been registered" I think we really need to think about
> > something like Secure Execution. We don't have, and we are unlikely
> > to have in hardware virtio-ccw implementations, and for a malicious 
> > hypervisor
> > that has full access to the guest memory hardening makes no sense.  
> 
> Does s390 have something like memory encryption? (I guess yes). In the
> case of x86 VM encryption, the I/O buffers were now done via software
> IOTLB, that's why hardening of the virtio driver is needed to prevent
> the hypervisor to poke the swiotlb etc.

Yep! Secure Execution is a confidential computing solution which is much
like encrypted guest memory, except for one gets exceptions when trying
to access private memory instead of ending up with garbage  because of
the encryption. These improvements are IMHO relevant to us!

Regards,
Halil
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v4 0/8] Introduce akcipher service for virtio-crypto

2022-04-12 Thread Paolo Bonzini




In our plan, the feature is designed for HTTPS offloading case and
other applications which use kernel RSA/ecdsa by keyctl syscall.


Hi Zhenwei,

what is the % of time spent doing asymmetric key operations in your
benchmark?  I am not very familiar with crypto acceleration but my
understanding has always been that most time is spent doing either
hashing (for signing) or symmetric key operations (for encryption).

If I understand correctly, without support for acceleration these 
patches are more of a demonstration of virtio-crypto, or usable for 
testing purposes.


Would it be possible to extend virtio-crypto to use keys already in the
host keyctl, or in a PKCS#11 smartcard, so that virtio-crypto could also
provide the functionality of an HSM?  Or does the standard require that
the keys are provided by the guest?

Paolo
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: Re: [PATCH v4 0/8] Introduce akcipher service for virtio-crypto

2022-04-12 Thread zhenwei pi



On 4/12/22 17:47, Paolo Bonzini wrote:



In our plan, the feature is designed for HTTPS offloading case and
other applications which use kernel RSA/ecdsa by keyctl syscall.


Hi Zhenwei,

what is the % of time spent doing asymmetric key operations in your
benchmark?  I am not very familiar with crypto acceleration but my
understanding has always been that most time is spent doing either
hashing (for signing) or symmetric key operations (for encryption).

If I understand correctly, without support for acceleration these 
patches are more of a demonstration of virtio-crypto, or usable for 
testing purposes.




Hi, Paolo

This is the perf result of nginx+openssl CPU calculation, the heavy load 
from openssl uses the most time(as same as you mentioned).
27.37%26.00%  nginxlibcrypto.so.1.1  [.] 
__bn_sqrx8x_reduction
20.58%19.52%  nginxlibcrypto.so.1.1  [.] 
mulx4x_internal
16.73%15.89%  nginxlibcrypto.so.1.1  [.] 
bn_sqrx8x_internal
 8.79% 0.00%  nginx[unknown] [k] 

 7.26% 0.00%  nginx[unknown] [.] 
0x89388669992a0cbc
 7.00% 0.00%  nginx[unknown] [k] 
0x45f0e480d5f2a58e
 6.76% 0.02%  nginx[kernel.kallsyms] [k] 
entry_SYSCALL_64_after_hwframe
 6.74% 0.02%  nginx[kernel.kallsyms] [k] 
do_syscall_64
 6.61% 0.00%  nginx[unknown] [.] 
0xa75a60d7820f9ffb
 6.47% 0.00%  nginx[unknown] [k] 
0xe91223f6da36254c
 5.51% 0.01%  nginx[kernel.kallsyms] [k] 
asm_common_interrupt
 5.46% 0.01%  nginx[kernel.kallsyms] [k] 
common_interrupt
 5.16% 0.04%  nginx[kernel.kallsyms] [k] 
__softirqentry_text_start
 4.92% 0.01%  nginx[kernel.kallsyms] [k] 
irq_exit_rcu
 4.91% 0.04%  nginx[kernel.kallsyms] [k] 
net_rx_action



This is the result of nginx+openssl keyctl offload(virtio crypto + host 
keyctl + Intel QAT):
30.38% 0.08%  nginx[kernel.kallsyms] [k] 
entry_SYSCALL_64_after_hwframe
30.29% 0.07%  nginx[kernel.kallsyms] [k] 
do_syscall_64
23.84% 0.00%  nginx[unknown] [k] 

14.24% 0.03%  nginx[kernel.kallsyms] [k] 
asm_common_interrupt
14.06% 0.05%  nginx[kernel.kallsyms] [k] 
common_interrupt
12.99% 0.11%  nginx[kernel.kallsyms] [k] 
__softirqentry_text_start
12.27% 0.12%  nginx[kernel.kallsyms] [k] 
net_rx_action

12.13% 0.03%  nginx[kernel.kallsyms] [k] __napi_poll
12.06% 0.06%  nginx[kernel.kallsyms] [k] 
irq_exit_rcu
10.49% 0.14%  nginxlibssl.so.1.1 [.] 
tls_process_client_key_exchange
10.21% 0.12%  nginx[virtio_net]  [k] 
virtnet_poll

10.13% 0.04%  nginxlibc-2.28.so  [.] syscall
10.12% 0.03%  nginxkctl-engine.so[.] 
kctl_rsa_priv_dec
10.02% 0.02%  nginxkctl-engine.so[.] 
kctl_hw_rsa_priv_func
 9.98% 0.01%  nginxlibkeyutils.so.1.10   [.] 
keyctl_pkey_decrypt

 9.95% 0.02%  nginxlibkeyutils.so.1.10   [.] keyctl
 9.77% 0.03%  nginx[kernel.kallsyms] [k] 
keyctl_pkey_e_d_s
 8.97% 0.00%  nginx[unknown] [k] 
0x7f4adbb81f0b
 8.78% 0.08%  nginxlibpthread-2.28.so[.] 
__libc_write
 8.49% 0.05%  nginx[kernel.kallsyms] [k] 
netif_receive_skb_list_internal


The RSA part gets reduced, and the QPS of https improves to ~200%.

Something may be ignored in this cover letter:
[4] Currently RSA is supported only in builtin driver. This driver is 
supposed to test the full feature without other software(Ex vhost 
process) and hardware dependence.

-> Yes, this patch is a demonstration of virtio-crypto.

[5] keyctl backend is in development, we will post this feature in 
Q2-2022. keyctl backend can use hardware acceleration(Ex, Intel QAT).

-> This is our plan. Currently it's still in developing.



Would it be possible to extend virtio-crypto to use keys already in the
host keyctl, or in a PKCS#11 smartcard, so that virtio-crypto could also
provide the functionality of an HSM?  Or does the standard require that
the keys are provided by the guest?

Paolo


I'm very interested in this, I'll try in Q3-2022 or later.

--
zhenwei pi
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v10 0/5] Use pageblock_order for cma and alloc_contig_range alignment.

2022-04-12 Thread David Hildenbrand
On 06.04.22 17:18, Zi Yan wrote:
> From: Zi Yan 
> 
> Hi David,

Hi!

> 
> This patchset tries to remove the MAX_ORDER-1 alignment requirement for CMA
> and alloc_contig_range(). It prepares for my upcoming changes to make
> MAX_ORDER adjustable at boot time[1]. It is on top of mmotm-2022-04-05-15-54.

Sorry for the late reply, I've got way too many irons in the fire right now.

> 
> I also added "Content-Type: text/plain; charset=UTF-8" to all email bodies
> explicitly, please let me know if you still cannot see the emails in a
> proper format.

Oh, thanks! But no need to work around Mimecast mailing issues on your
side. This just has to be fixed for good on the RH side ...


I yet heave to give #3 a thorough review, sorry for not commenting on
that earlier. It's a bit more involved, especially, with all the
possible corner cases :)

-- 
Thanks,

David / dhildenb

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v10 2/5] mm: page_isolation: check specified range for unmovable pages

2022-04-12 Thread David Hildenbrand
On 06.04.22 17:18, Zi Yan wrote:
> From: Zi Yan 
> 
> Enable set_migratetype_isolate() to check specified sub-range for
> unmovable pages during isolation. Page isolation is done
> at MAX_ORDER_NR_PAEGS granularity, but not all pages within that
> granularity are intended to be isolated. For example,
> alloc_contig_range(), which uses page isolation, allows ranges without
> alignment. This commit makes unmovable page check only look for
> interesting pages, so that page isolation can succeed for any
> non-overlapping ranges.
> 
> Signed-off-by: Zi Yan 
> ---

[...]

>  /*
> - * This function checks whether pageblock includes unmovable pages or not.
> + * This function checks whether the range [start_pfn, end_pfn) includes
> + * unmovable pages or not. The range must fall into a single pageblock and
> + * consequently belong to a single zone.
>   *
>   * PageLRU check without isolation or lru_lock could race so that
>   * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
> @@ -28,12 +30,14 @@
>   * cannot get removed (e.g., via memory unplug) concurrently.
>   *
>   */
> -static struct page *has_unmovable_pages(struct zone *zone, struct page *page,
> -  int migratetype, int flags)
> +static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned 
> long end_pfn,
> + int migratetype, int flags)
>  {
> - unsigned long iter = 0;
> - unsigned long pfn = page_to_pfn(page);
> - unsigned long offset = pfn % pageblock_nr_pages;
> + unsigned long pfn = start_pfn;
> + struct page *page = pfn_to_page(pfn);


Just do

struct page *page = pfn_to_page(start_pfn);
struct zone *zone = page_zone(page);

here. No need to lookup the zone again in the loop because, as you
document "must ... belong to a single zone.".

Then, there is also no need to initialize "pfn" here. In the loop header
is sufficient.

> +
> + VM_BUG_ON(ALIGN_DOWN(start_pfn, pageblock_nr_pages) !=
> +   ALIGN_DOWN(end_pfn - 1, pageblock_nr_pages));
>  
>   if (is_migrate_cma_page(page)) {
>   /*
> @@ -47,8 +51,11 @@ static struct page *has_unmovable_pages(struct zone *zone, 
> struct page *page,
>   return page;
>   }
>  
> - for (; iter < pageblock_nr_pages - offset; iter++) {
> - page = pfn_to_page(pfn + iter);
> + for (pfn = start_pfn; pfn < end_pfn; pfn++) {
> + struct zone *zone;
> +
> + page = pfn_to_page(pfn);
> + zone = page_zone(page);
>  
>   /*
>* Both, bootmem allocations and memory holes are marked
> @@ -85,7 +92,7 @@ static struct page *has_unmovable_pages(struct zone *zone, 
> struct page *page,
>   }
>  
>   skip_pages = compound_nr(head) - (page - head);
> - iter += skip_pages - 1;
> + pfn += skip_pages - 1;
>   continue;
>   }
>  
> @@ -97,7 +104,7 @@ static struct page *has_unmovable_pages(struct zone *zone, 
> struct page *page,
>*/
>   if (!page_ref_count(page)) {
>   if (PageBuddy(page))
> - iter += (1 << buddy_order(page)) - 1;
> + pfn += (1 << buddy_order(page)) - 1;
>   continue;
>   }
>  
> @@ -134,11 +141,18 @@ static struct page *has_unmovable_pages(struct zone 
> *zone, struct page *page,
>   return NULL;
>  }
>  
> -static int set_migratetype_isolate(struct page *page, int migratetype, int 
> isol_flags)
> +/*
> + * This function set pageblock migratetype to isolate if no unmovable page is
> + * present in [start_pfn, end_pfn). The pageblock must intersect with
> + * [start_pfn, end_pfn).
> + */
> +static int set_migratetype_isolate(struct page *page, int migratetype, int 
> isol_flags,
> + unsigned long start_pfn, unsigned long end_pfn)

I think we might be able do better, eventually not passing start_pfn at
all. Hmm.

I think we want to pull out the
start_isolate_page_range()/undo_isolate_page_range() interface change
into a separate patch.

Let me try to give it a shot, I'll try hacking something up real quick
to see if we can do better.

-- 
Thanks,

David / dhildenb

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v10 2/5] mm: page_isolation: check specified range for unmovable pages

2022-04-12 Thread David Hildenbrand
On 12.04.22 16:07, Zi Yan wrote:
> On 12 Apr 2022, at 9:10, David Hildenbrand wrote:
> 
>> On 06.04.22 17:18, Zi Yan wrote:
>>> From: Zi Yan 
>>>
>>> Enable set_migratetype_isolate() to check specified sub-range for
>>> unmovable pages during isolation. Page isolation is done
>>> at MAX_ORDER_NR_PAEGS granularity, but not all pages within that
>>> granularity are intended to be isolated. For example,
>>> alloc_contig_range(), which uses page isolation, allows ranges without
>>> alignment. This commit makes unmovable page check only look for
>>> interesting pages, so that page isolation can succeed for any
>>> non-overlapping ranges.
>>>
>>> Signed-off-by: Zi Yan 
>>> ---
>>
>> [...]
>>
>>>  /*
>>> - * This function checks whether pageblock includes unmovable pages or not.
>>> + * This function checks whether the range [start_pfn, end_pfn) includes
>>> + * unmovable pages or not. The range must fall into a single pageblock and
>>> + * consequently belong to a single zone.
>>>   *
>>>   * PageLRU check without isolation or lru_lock could race so that
>>>   * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
>>> @@ -28,12 +30,14 @@
>>>   * cannot get removed (e.g., via memory unplug) concurrently.
>>>   *
>>>   */
>>> -static struct page *has_unmovable_pages(struct zone *zone, struct page 
>>> *page,
>>> -int migratetype, int flags)
>>> +static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned 
>>> long end_pfn,
>>> +   int migratetype, int flags)
>>>  {
>>> -   unsigned long iter = 0;
>>> -   unsigned long pfn = page_to_pfn(page);
>>> -   unsigned long offset = pfn % pageblock_nr_pages;
>>> +   unsigned long pfn = start_pfn;
>>> +   struct page *page = pfn_to_page(pfn);
>>
>>
>> Just do
>>
>> struct page *page = pfn_to_page(start_pfn);
>> struct zone *zone = page_zone(page);
>>
>> here. No need to lookup the zone again in the loop because, as you
>> document "must ... belong to a single zone.".
>>
>> Then, there is also no need to initialize "pfn" here. In the loop header
>> is sufficient.
>>
> 
> Sure.
> 
>>> +
>>> +   VM_BUG_ON(ALIGN_DOWN(start_pfn, pageblock_nr_pages) !=
>>> + ALIGN_DOWN(end_pfn - 1, pageblock_nr_pages));
>>>
>>> if (is_migrate_cma_page(page)) {
>>> /*
>>> @@ -47,8 +51,11 @@ static struct page *has_unmovable_pages(struct zone 
>>> *zone, struct page *page,
>>> return page;
>>> }
>>>
>>> -   for (; iter < pageblock_nr_pages - offset; iter++) {
>>> -   page = pfn_to_page(pfn + iter);
>>> +   for (pfn = start_pfn; pfn < end_pfn; pfn++) {
>>> +   struct zone *zone;
>>> +
>>> +   page = pfn_to_page(pfn);
>>> +   zone = page_zone(page);
>>>
>>> /*
>>>  * Both, bootmem allocations and memory holes are marked
>>> @@ -85,7 +92,7 @@ static struct page *has_unmovable_pages(struct zone 
>>> *zone, struct page *page,
>>> }
>>>
>>> skip_pages = compound_nr(head) - (page - head);
>>> -   iter += skip_pages - 1;
>>> +   pfn += skip_pages - 1;
>>> continue;
>>> }
>>>
>>> @@ -97,7 +104,7 @@ static struct page *has_unmovable_pages(struct zone 
>>> *zone, struct page *page,
>>>  */
>>> if (!page_ref_count(page)) {
>>> if (PageBuddy(page))
>>> -   iter += (1 << buddy_order(page)) - 1;
>>> +   pfn += (1 << buddy_order(page)) - 1;
>>> continue;
>>> }
>>>
>>> @@ -134,11 +141,18 @@ static struct page *has_unmovable_pages(struct zone 
>>> *zone, struct page *page,
>>> return NULL;
>>>  }
>>>
>>> -static int set_migratetype_isolate(struct page *page, int migratetype, int 
>>> isol_flags)
>>> +/*
>>> + * This function set pageblock migratetype to isolate if no unmovable page 
>>> is
>>> + * present in [start_pfn, end_pfn). The pageblock must intersect with
>>> + * [start_pfn, end_pfn).
>>> + */
>>> +static int set_migratetype_isolate(struct page *page, int migratetype, int 
>>> isol_flags,
>>> +   unsigned long start_pfn, unsigned long end_pfn)
>>
>> I think we might be able do better, eventually not passing start_pfn at
>> all. Hmm.
> 
> IMHO, having start_pfn and end_pfn in the parameter list would make the
> interface easier to understand. Otherwise if we remove start_pfn,
> the caller needs to adjust @page to be within the range of [start_pfn,
> end_pfn)
> 
>>
>> I think we want to pull out the
>> start_isolate_page_range()/undo_isolate_page_range() interface change
>> into a separate patch.
> 
> You mean a patch just adding
> 
> unsigned long isolate_start = pfn_max_align_down(start_pfn);
> unsigned long isolate_end = pfn_max_align_up(end_pfn);
> 
> in start_isolate_page_range()/undo_isolate_page_range()?
> 
> Yes I can do that.

I think we have to be careful with memory onlining/o

Re: [PATCH v10 2/5] mm: page_isolation: check specified range for unmovable pages

2022-04-12 Thread David Hildenbrand
On 12.04.22 17:01, Zi Yan wrote:
> On 12 Apr 2022, at 10:49, David Hildenbrand wrote:
> 
>> On 12.04.22 16:07, Zi Yan wrote:
>>> On 12 Apr 2022, at 9:10, David Hildenbrand wrote:
>>>
 On 06.04.22 17:18, Zi Yan wrote:
> From: Zi Yan 
>
> Enable set_migratetype_isolate() to check specified sub-range for
> unmovable pages during isolation. Page isolation is done
> at MAX_ORDER_NR_PAEGS granularity, but not all pages within that
> granularity are intended to be isolated. For example,
> alloc_contig_range(), which uses page isolation, allows ranges without
> alignment. This commit makes unmovable page check only look for
> interesting pages, so that page isolation can succeed for any
> non-overlapping ranges.
>
> Signed-off-by: Zi Yan 
> ---

 [...]

>  /*
> - * This function checks whether pageblock includes unmovable pages or 
> not.
> + * This function checks whether the range [start_pfn, end_pfn) includes
> + * unmovable pages or not. The range must fall into a single pageblock 
> and
> + * consequently belong to a single zone.
>   *
>   * PageLRU check without isolation or lru_lock could race so that
>   * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable
> @@ -28,12 +30,14 @@
>   * cannot get removed (e.g., via memory unplug) concurrently.
>   *
>   */
> -static struct page *has_unmovable_pages(struct zone *zone, struct page 
> *page,
> -  int migratetype, int flags)
> +static struct page *has_unmovable_pages(unsigned long start_pfn, 
> unsigned long end_pfn,
> + int migratetype, int flags)
>  {
> - unsigned long iter = 0;
> - unsigned long pfn = page_to_pfn(page);
> - unsigned long offset = pfn % pageblock_nr_pages;
> + unsigned long pfn = start_pfn;
> + struct page *page = pfn_to_page(pfn);


 Just do

 struct page *page = pfn_to_page(start_pfn);
 struct zone *zone = page_zone(page);

 here. No need to lookup the zone again in the loop because, as you
 document "must ... belong to a single zone.".

 Then, there is also no need to initialize "pfn" here. In the loop header
 is sufficient.

>>>
>>> Sure.
>>>
> +
> + VM_BUG_ON(ALIGN_DOWN(start_pfn, pageblock_nr_pages) !=
> +   ALIGN_DOWN(end_pfn - 1, pageblock_nr_pages));
>
>   if (is_migrate_cma_page(page)) {
>   /*
> @@ -47,8 +51,11 @@ static struct page *has_unmovable_pages(struct zone 
> *zone, struct page *page,
>   return page;
>   }
>
> - for (; iter < pageblock_nr_pages - offset; iter++) {
> - page = pfn_to_page(pfn + iter);
> + for (pfn = start_pfn; pfn < end_pfn; pfn++) {
> + struct zone *zone;
> +
> + page = pfn_to_page(pfn);
> + zone = page_zone(page);
>
>   /*
>* Both, bootmem allocations and memory holes are marked
> @@ -85,7 +92,7 @@ static struct page *has_unmovable_pages(struct zone 
> *zone, struct page *page,
>   }
>
>   skip_pages = compound_nr(head) - (page - head);
> - iter += skip_pages - 1;
> + pfn += skip_pages - 1;
>   continue;
>   }
>
> @@ -97,7 +104,7 @@ static struct page *has_unmovable_pages(struct zone 
> *zone, struct page *page,
>*/
>   if (!page_ref_count(page)) {
>   if (PageBuddy(page))
> - iter += (1 << buddy_order(page)) - 1;
> + pfn += (1 << buddy_order(page)) - 1;
>   continue;
>   }
>
> @@ -134,11 +141,18 @@ static struct page *has_unmovable_pages(struct zone 
> *zone, struct page *page,
>   return NULL;
>  }
>
> -static int set_migratetype_isolate(struct page *page, int migratetype, 
> int isol_flags)
> +/*
> + * This function set pageblock migratetype to isolate if no unmovable 
> page is
> + * present in [start_pfn, end_pfn). The pageblock must intersect with
> + * [start_pfn, end_pfn).
> + */
> +static int set_migratetype_isolate(struct page *page, int migratetype, 
> int isol_flags,
> + unsigned long start_pfn, unsigned long end_pfn)

 I think we might be able do better, eventually not passing start_pfn at
 all. Hmm.
>>>
>>> IMHO, having start_pfn and end_pfn in the parameter list would make the
>>> interface easier to understand. Otherwise if we remove start_pfn,
>>> the caller needs to adjust @page to be within the range of [start_pfn,
>>> end_pfn)
>>>

 I think we want to pull out the
 start_isolate_page_range()/undo_isolate_page_range() interface change
 into a separate patch.
>>>
>>> Y

Re: [PATCH v2 1/2] vdpa: Add support for querying vendor statistics

2022-04-12 Thread kernel test robot
Hi Eli,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.18-rc2 next-20220412]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/intel-lab-lkp/linux/commits/Eli-Cohen/Show-statistics-for-a-vdpa-device/20220412-212129
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
ce522ba9ef7e2d9fb22a39eb3371c0c64e2a433e
config: i386-randconfig-a004-20220411 
(https://download.01.org/0day-ci/archive/20220413/202204130003.froyiuxa-...@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 
fe2478d44e4f7f191c43fef629ac7a23d0251e72)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/813a1bf827634a8d7e148b133e2849fac37819cd
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Eli-Cohen/Show-statistics-for-a-vdpa-device/20220412-212129
git checkout 813a1bf827634a8d7e148b133e2849fac37819cd
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
O=build_dir ARCH=i386 SHELL=/bin/bash drivers/usb/typec/ drivers/vdpa/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> drivers/vdpa/vdpa.c:1122:61: warning: implicit conversion from 'int' to 
>> 's16' (aka 'short') changes value from 65535 to -1 [-Wconstant-conversion]
   [VDPA_ATTR_DEV_QUEUE_INDEX] = NLA_POLICY_RANGE(NLA_U32, 0, 65535),
 ~^~
   include/net/netlink.h:396:9: note: expanded from macro 'NLA_POLICY_RANGE'
   .max = _max \
  ^~~~
   1 warning generated.


vim +1122 drivers/vdpa/vdpa.c

  1114  
  1115  static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
  1116  [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
  1117  [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
  1118  [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
  1119  [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
  1120  /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
  1121  [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
> 1122  [VDPA_ATTR_DEV_QUEUE_INDEX] = NLA_POLICY_RANGE(NLA_U32, 0, 
> 65535),
  1123  };
  1124  

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 4/5] virtio-pci: implement synchronize_vqs()

2022-04-12 Thread Cornelia Huck
On Tue, Apr 12 2022, Halil Pasic  wrote:

> On Mon, 11 Apr 2022 16:27:41 +0200
> Cornelia Huck  wrote:

>> My main concern is that we would need to synchronize against a single
>> interrupt that covers all kinds of I/O interrupts, not just a single
>> device...
>> 
>
> Could we synchronize on struct airq_info's lock member? If we were
> to grab all of these that might be involved...

Hm, that could possibly narrow the sync down to a subset, which seems
better. For devices still using classic interrupts, per-device sync
would be easy.

>
> AFAIU for the synchronize implementation we need a lock or a set of locks
> that contain all the possible vring_interrupt() calls with the queuues
> that belong to the given device as a critical section. That way, one
> has the acquire's and release's in place so that the vrign_interrupt()
> either guaranteed to finish before the change of driver_ready is
> guaranteed to be complete, or it is guaranteed to see the change.
>
> In any case, I guess we should first get clear on the first part. I.e.
> when do we want to allow host->guest notifications.

Also, whether we just care about vring interrupts, or general device
interrupts (not sure if a config change interrupt may also trigger
things we do not want to trigger?)

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v9 01/32] virtio: add helper virtqueue_get_vring_max_size()

2022-04-12 Thread Xuan Zhuo
On Tue, 12 Apr 2022 10:41:03 +0800, Jason Wang  wrote:
>
> 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > Record the maximum queue num supported by the device.
> >
> > virtio-net can display the maximum (supported by hardware) ring size in
> > ethtool -g eth0.
> >
> > When the subsequent patch implements vring reset, it can judge whether
> > the ring size passed by the driver is legal based on this.
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >   arch/um/drivers/virtio_uml.c |  1 +
> >   drivers/platform/mellanox/mlxbf-tmfifo.c |  2 ++
> >   drivers/remoteproc/remoteproc_virtio.c   |  2 ++
> >   drivers/s390/virtio/virtio_ccw.c |  3 +++
> >   drivers/virtio/virtio_mmio.c |  2 ++
> >   drivers/virtio/virtio_pci_legacy.c   |  2 ++
> >   drivers/virtio/virtio_pci_modern.c   |  2 ++
> >   drivers/virtio/virtio_ring.c | 14 ++
> >   drivers/virtio/virtio_vdpa.c |  2 ++
> >   include/linux/virtio.h   |  2 ++
> >   10 files changed, 32 insertions(+)
> >
> > diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
> > index ba562d68dc04..904993d15a85 100644
> > --- a/arch/um/drivers/virtio_uml.c
> > +++ b/arch/um/drivers/virtio_uml.c
> > @@ -945,6 +945,7 @@ static struct virtqueue *vu_setup_vq(struct 
> > virtio_device *vdev,
> > goto error_create;
> > }
> > vq->priv = info;
> > +   vq->num_max = num;
> > num = virtqueue_get_vring_size(vq);
> >
> > if (vu_dev->protocol_features &
> > diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c 
> > b/drivers/platform/mellanox/mlxbf-tmfifo.c
> > index 38800e86ed8a..1ae3c56b66b0 100644
> > --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
> > +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> > @@ -959,6 +959,8 @@ static int mlxbf_tmfifo_virtio_find_vqs(struct 
> > virtio_device *vdev,
> > goto error;
> > }
> >
> > +   vq->num_max = vring->num;
> > +
> > vqs[i] = vq;
> > vring->vq = vq;
> > vq->priv = vring;
> > diff --git a/drivers/remoteproc/remoteproc_virtio.c 
> > b/drivers/remoteproc/remoteproc_virtio.c
> > index 70ab496d0431..7611755d0ae2 100644
> > --- a/drivers/remoteproc/remoteproc_virtio.c
> > +++ b/drivers/remoteproc/remoteproc_virtio.c
> > @@ -125,6 +125,8 @@ static struct virtqueue *rp_find_vq(struct 
> > virtio_device *vdev,
> > return ERR_PTR(-ENOMEM);
> > }
> >
> > +   vq->num_max = len;
>
>
> I wonder if this is correct.
>
> It looks to me len is counted in bytes:
>
> /**
>   * struct rproc_vring - remoteproc vring state
>   * @va: virtual address
>   * @len: length, in bytes
>   * @da: device address
>   * @align: vring alignment
>   * @notifyid: rproc-specific unique vring index
>   * @rvdev: remote vdev
>   * @vq: the virtqueue of this vring
>   */
> struct rproc_vring {
>      void *va;
>      int len;
>      u32 da;
>      u32 align;
>      int notifyid;
>      struct rproc_vdev *rvdev;
>      struct virtqueue *vq;
> };
>

I think this comment is incorrect because here len is passed as num to
vring_new_virtqueue().

There is also this usage:

/* actual size of vring (in bytes) */
size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));


And this value comes from here:

static int
rproc_parse_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, 
int i)
{
struct rproc *rproc = rvdev->rproc;
struct device *dev = &rproc->dev;
struct fw_rsc_vdev_vring *vring = &rsc->vring[i];
struct rproc_vring *rvring = &rvdev->vring[i];

dev_dbg(dev, "vdev rsc: vring%d: da 0x%x, qsz %d, align %d\n",
i, vring->da, vring->num, vring->align);

/* verify queue size and vring alignment are sane */
if (!vring->num || !vring->align) {
dev_err(dev, "invalid qsz (%d) or alignment (%d)\n",
vring->num, vring->align);
return -EINVAL;
}

   >rvring->len = vring->num;
rvring->align = vring->align;
rvring->rvdev = rvdev;

return 0;
}

/**
 * struct fw_rsc_vdev_vring - vring descriptor entry
 * @da: device address
 * @align: the alignment between the consumer and producer parts of the vring
 * @num: num of buffers supported by this vring (must be power of two)
 * @notifyid: a unique rproc-wide notify index for this vring. This notify
 * index is used when kicking a remote processor, to let it know that this
 * vring is triggered.
 * @pa: physical address
 *
 * This descriptor is not a resource entry by itself; it is part of the
 * vdev resource type (see below).
 *
 * Note that @da should either contain the device address where
 * the remote processor is expecting the vring, or indicate that
 * dynamically all

Re: [PATCH V2 4/5] virtio-pci: implement synchronize_vqs()

2022-04-12 Thread Jason Wang
On Wed, Apr 13, 2022 at 12:49 AM Cornelia Huck  wrote:
>
> On Tue, Apr 12 2022, Halil Pasic  wrote:
>
> > On Mon, 11 Apr 2022 16:27:41 +0200
> > Cornelia Huck  wrote:
>
> >> My main concern is that we would need to synchronize against a single
> >> interrupt that covers all kinds of I/O interrupts, not just a single
> >> device...
> >>
> >
> > Could we synchronize on struct airq_info's lock member? If we were
> > to grab all of these that might be involved...
>
> Hm, that could possibly narrow the sync down to a subset, which seems
> better. For devices still using classic interrupts, per-device sync
> would be easy.
>
> >
> > AFAIU for the synchronize implementation we need a lock or a set of locks
> > that contain all the possible vring_interrupt() calls with the queuues
> > that belong to the given device as a critical section. That way, one
> > has the acquire's and release's in place so that the vrign_interrupt()
> > either guaranteed to finish before the change of driver_ready is
> > guaranteed to be complete, or it is guaranteed to see the change.
> >
> > In any case, I guess we should first get clear on the first part. I.e.
> > when do we want to allow host->guest notifications.
>
> Also, whether we just care about vring interrupts, or general device
> interrupts (not sure if a config change interrupt may also trigger
> things we do not want to trigger?)

I think only vring interrupts, since the config interrupt hardening is
done via 22b7050a024d7 ("virtio: defer config changed notifications")

Thanks

>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] fuse: avoid unnecessary spinlock bump

2022-04-12 Thread JeffleXu



On 4/11/22 10:00 PM, Vivek Goyal wrote:
> On Mon, Apr 11, 2022 at 03:20:05PM +0200, Bernd Schubert wrote:
> 
> So for testing DAX, I have to rely on out of tree patches from qemu
> here if any changes in virtiofs client happen.
> 
> https://gitlab.com/virtio-fs/qemu/-/tree/virtio-fs-dev
> 
> Jeffle is probably relying on their own virtiofsd implementation for DAX
> testing.
> 

Actually I also use the C version virtiofsd in the above described
repository for testing :)

-- 
Thanks,
Jeffle
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v9 23/32] virtio_pci: queue_reset: support VIRTIO_F_RING_RESET

2022-04-12 Thread Xuan Zhuo
On Tue, 12 Apr 2022 15:07:58 +0800, Jason Wang  wrote:
>
> 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > This patch implements virtio pci support for QUEUE RESET.
> >
> > Performing reset on a queue is divided into these steps:
> >
> >   1. notify the device to reset the queue
> >   2. recycle the buffer submitted
> >   3. reset the vring (may re-alloc)
> >   4. mmap vring to device, and enable the queue
> >
> > This patch implements virtio_reset_vq(), virtio_enable_resetq() in the
> > pci scenario.
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >   drivers/virtio/virtio_pci_common.c |  8 +--
> >   drivers/virtio/virtio_pci_modern.c | 84 ++
> >   drivers/virtio/virtio_ring.c   |  2 +
> >   include/linux/virtio.h |  1 +
> >   4 files changed, 92 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_pci_common.c 
> > b/drivers/virtio/virtio_pci_common.c
> > index fdbde1db5ec5..863d3a8a0956 100644
> > --- a/drivers/virtio/virtio_pci_common.c
> > +++ b/drivers/virtio/virtio_pci_common.c
> > @@ -248,9 +248,11 @@ static void vp_del_vq(struct virtqueue *vq)
> > struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
> > unsigned long flags;
> >
> > -   spin_lock_irqsave(&vp_dev->lock, flags);
> > -   list_del(&info->node);
> > -   spin_unlock_irqrestore(&vp_dev->lock, flags);
> > +   if (!vq->reset) {
>
>
> On which condition that we may hit this path?
>
>
> > +   spin_lock_irqsave(&vp_dev->lock, flags);
> > +   list_del(&info->node);
> > +   spin_unlock_irqrestore(&vp_dev->lock, flags);
> > +   }
> >
> > vp_dev->del_vq(info);
> > kfree(info);
> > diff --git a/drivers/virtio/virtio_pci_modern.c 
> > b/drivers/virtio/virtio_pci_modern.c
> > index 49a4493732cf..cb5d38f1c9c8 100644
> > --- a/drivers/virtio/virtio_pci_modern.c
> > +++ b/drivers/virtio/virtio_pci_modern.c
> > @@ -34,6 +34,9 @@ static void vp_transport_features(struct virtio_device 
> > *vdev, u64 features)
> > if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) &&
> > pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV))
> > __virtio_set_bit(vdev, VIRTIO_F_SR_IOV);
> > +
> > +   if (features & BIT_ULL(VIRTIO_F_RING_RESET))
> > +   __virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
> >   }
> >
> >   /* virtio config->finalize_features() implementation */
> > @@ -199,6 +202,83 @@ static int vp_active_vq(struct virtqueue *vq, u16 
> > msix_vec)
> > return 0;
> >   }
> >
> > +static int vp_modern_reset_vq(struct virtqueue *vq)
> > +{
> > +   struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
> > +   struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
> > +   struct virtio_pci_vq_info *info;
> > +   unsigned long flags;
> > +
> > +   if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET))
> > +   return -ENOENT;
> > +
> > +   vp_modern_set_queue_reset(mdev, vq->index);
> > +
> > +   info = vp_dev->vqs[vq->index];
> > +
> > +   /* delete vq from irq handler */
> > +   spin_lock_irqsave(&vp_dev->lock, flags);
> > +   list_del(&info->node);
> > +   spin_unlock_irqrestore(&vp_dev->lock, flags);
> > +
> > +   INIT_LIST_HEAD(&info->node);
> > +
> > +   /* For the case where vq has an exclusive irq, to prevent the irq from
> > +* being received again and the pending irq, call disable_irq().
> > +*
> > +* In the scenario based on shared interrupts, vq will be searched from
> > +* the queue virtqueues. Since the previous list_del() has been deleted
> > +* from the queue, it is impossible for vq to be called in this case.
> > +* There is no need to close the corresponding interrupt.
> > +*/
> > +   if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
> > +   disable_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
>
>
> See the previous discussion and the revert of the first try to harden
> the interrupt. We probably can't use disable_irq() since it conflicts
> with the affinity managed IRQ that is used by some drivers.
>
> We need to use synchonize_irq() and per virtqueue flag instead. As
> mentioned in previous patches, this could be done on top of my rework on
> the IRQ hardening .

OK, the next version will contain hardened features by per virtqueue flag.

Thanks.

>
>
> > +
> > +   vq->reset = true;
> > +
> > +   return 0;
> > +}
> > +
> > +static int vp_modern_enable_reset_vq(struct virtqueue *vq)
> > +{
> > +   struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
> > +   struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
> > +   struct virtio_pci_vq_info *info;
> > +   unsigned long flags, index;
> > +   int err;
> > +
> > +   if (!vq->reset)
> > +   return -EBUSY;
> > +
> > +   index = vq->index;
> > +   info = vp_dev->vqs[index];
> > +
> > +   /* check queue reset status */
> > +   if (vp_modern_get_queue_reset(mdev, index) != 1)
> > +   return -EBUSY;
> > +
> > +   err = vp_active_vq(vq, info->msix_vector);
> > +   if (err)

Re: [PATCH v9 12/32] virtio_ring: packed: extract the logic of alloc queue

2022-04-12 Thread Xuan Zhuo
On Tue, 12 Apr 2022 14:28:24 +0800, Jason Wang  wrote:
>
> 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > Separate the logic of packed to create vring queue.
> >
> > For the convenience of passing parameters, add a structure
> > vring_packed.
> >
> > This feature is required for subsequent virtuqueue reset vring.
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >   drivers/virtio/virtio_ring.c | 70 
> >   1 file changed, 56 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 33864134a744..ea451ae2aaef 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -1817,19 +1817,17 @@ static struct vring_desc_extra 
> > *vring_alloc_desc_extra(unsigned int num)
> > return desc_extra;
> >   }
> >
> > -static struct virtqueue *vring_create_virtqueue_packed(
> > -   unsigned int index,
> > -   unsigned int num,
> > -   unsigned int vring_align,
> > -   struct virtio_device *vdev,
> > -   bool weak_barriers,
> > -   bool may_reduce_num,
> > -   bool context,
> > -   bool (*notify)(struct virtqueue *),
> > -   void (*callback)(struct virtqueue *),
> > -   const char *name)
> > +static int vring_alloc_queue_packed(struct virtio_device *vdev,
> > +   u32 num,
> > +   struct vring_packed_desc **_ring,
> > +   struct vring_packed_desc_event **_driver,
> > +   struct vring_packed_desc_event **_device,
> > +   dma_addr_t *_ring_dma_addr,
> > +   dma_addr_t *_driver_event_dma_addr,
> > +   dma_addr_t *_device_event_dma_addr,
> > +   size_t *_ring_size_in_bytes,
> > +   size_t *_event_size_in_bytes)
> >   {
> > -   struct vring_virtqueue *vq;
> > struct vring_packed_desc *ring;
> > struct vring_packed_desc_event *driver, *device;
> > dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
> > @@ -1857,6 +1855,52 @@ static struct virtqueue 
> > *vring_create_virtqueue_packed(
> > if (!device)
> > goto err_device;
> >
> > +   *_ring   = ring;
> > +   *_driver = driver;
> > +   *_device = device;
> > +   *_ring_dma_addr  = ring_dma_addr;
> > +   *_driver_event_dma_addr  = driver_event_dma_addr;
> > +   *_device_event_dma_addr  = device_event_dma_addr;
> > +   *_ring_size_in_bytes = ring_size_in_bytes;
> > +   *_event_size_in_bytes= event_size_in_bytes;
>
>
> I wonder if we can simply factor out split and packed from struct
> vring_virtqueue:
>
> struct vring_virtqueue {
>      union {
>          struct {} split;
>          struct {} packed;
>      };
> };
>
> to
>
> struct vring_virtqueue_split {};
> struct vring_virtqueue_packed {};
>
> Then we can do things like:
>
> vring_create_virtqueue_packed(struct virtio_device *vdev, u32 num,
> struct vring_virtqueue_packed *packed);
>
> and
>
> vring_vritqueue_attach_packed(struct vring_virtqueue *vq, struct
> vring_virtqueue_packed packed);

This idea is very similar to my previous idea, just without introducing a new
structure.

I'd be more than happy to revise this.

Thanks.


>
> Thanks
>
>
> > +
> > +   return 0;
> > +
> > +err_device:
> > +   vring_free_queue(vdev, event_size_in_bytes, driver, 
> > driver_event_dma_addr);
> > +
> > +err_driver:
> > +   vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr);
> > +
> > +err_ring:
> > +   return -ENOMEM;
> > +}
> > +
> > +static struct virtqueue *vring_create_virtqueue_packed(
> > +   unsigned int index,
> > +   unsigned int num,
> > +   unsigned int vring_align,
> > +   struct virtio_device *vdev,
> > +   bool weak_barriers,
> > +   bool may_reduce_num,
> > +   bool context,
> > +   bool (*notify)(struct virtqueue *),
> > +   void (*callback)(struct virtqueue *),
> > +   const char *name)
> > +{
> > +   dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
> > +   struct vring_packed_desc_event *driver, *device;
> > +   size_t ring_size_in_bytes, event_size_in_bytes;
> > +   struct vring_packed_desc *ring;
> > +   struct vring_virtqueue *vq;
> > +
> > +   if (vring_alloc_queue_packed(vdev, num, &ring, &driver, &device,
> > +&ring_dma_addr, &driver_event_dma_addr,
> > +&device_event_dma_addr,
> > +&ring_size_in_bytes,
> > +&event_size_in_bytes))
> > +   goto err_ring;
> > +
> > vq = kmalloc(sizeof(*vq), GFP_KERNEL);
> > if (!vq)
> > goto err_vq;
> > @@ -1939,9 +1983,7 @@ static struct virtqueue 
> > *vring_create_virtqueue_packed(
> > kfree(vq);
> >   err_vq:
> > vring_free_queue(vdev, event_size_in_bytes, device, 
> > device_event_dma_addr);
> > -err_device:
> > 

Re: [PATCH v9 11/32] virtio_ring: split: introduce virtqueue_resize_split()

2022-04-12 Thread Xuan Zhuo
On Tue, 12 Apr 2022 13:53:44 +0800, Jason Wang  wrote:
>
> 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > virtio ring split supports resize.
> >
> > Only after the new vring is successfully allocated based on the new num,
> > we will release the old vring. In any case, an error is returned,
> > indicating that the vring still points to the old vring.
> >
> > In the case of an error, the caller must
> > re-initialize(virtqueue_reinit_split()) the virtqueue to ensure that the
> > vring can be used.
> >
> > In addition, vring_align, may_reduce_num are necessary for reallocating
> > vring, so they are retained for creating vq.
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >   drivers/virtio/virtio_ring.c | 47 
> >   1 file changed, 47 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 3dc6ace2ba7a..33864134a744 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -139,6 +139,12 @@ struct vring_virtqueue {
> > /* DMA address and size information */
> > dma_addr_t queue_dma_addr;
> > size_t queue_size_in_bytes;
> > +
> > +   /* The parameters for creating vrings are reserved for
> > +* creating new vring.
> > +*/
> > +   u32 vring_align;
> > +   bool may_reduce_num;
> > } split;
> >
> > /* Available for packed ring */
> > @@ -199,6 +205,7 @@ struct vring_virtqueue {
> >   };
> >
> >   static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
> > +static void vring_free(struct virtqueue *_vq);
> >
> >   /*
> >* Helpers.
> > @@ -1088,6 +1095,8 @@ static struct virtqueue *vring_create_virtqueue_split(
> > return NULL;
> > }
> >
> > +   to_vvq(vq)->split.vring_align = vring_align;
> > +   to_vvq(vq)->split.may_reduce_num = may_reduce_num;
>
>
> It looks to me the above should belong to patch 6.

patch 6 just extracts a function, no logical modification.

to_vvq(vq)->split.may_reduce_num is newly added, so I don't think it should be
merged into patch 6.

>
>
> > to_vvq(vq)->split.queue_dma_addr = dma_addr;
> > to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes;
> > to_vvq(vq)->we_own_ring = true;
> > @@ -1095,6 +1104,44 @@ static struct virtqueue 
> > *vring_create_virtqueue_split(
> > return vq;
> >   }
> >
> > +static int virtqueue_resize_split(struct virtqueue *_vq, u32 num)
> > +{
> > +   struct vring_virtqueue *vq = to_vvq(_vq);
> > +   struct virtio_device *vdev = _vq->vdev;
> > +   struct vring_desc_state_split *state;
> > +   struct vring_desc_extra *extra;
> > +   size_t queue_size_in_bytes;
> > +   dma_addr_t dma_addr;
> > +   struct vring vring;
> > +   int err = -ENOMEM;
> > +   void *queue;
> > +
> > +   queue = vring_alloc_queue_split(vdev, &dma_addr, &num,
> > +   vq->split.vring_align,
> > +   vq->weak_barriers,
> > +   vq->split.may_reduce_num);
> > +   if (!queue)
> > +   return -ENOMEM;
> > +
> > +   queue_size_in_bytes = vring_size(num, vq->split.vring_align);
> > +
> > +   err = vring_alloc_state_extra_split(num, &state, &extra);
> > +   if (err) {
> > +   vring_free_queue(vdev, queue_size_in_bytes, queue, dma_addr);
> > +   return -ENOMEM;
> > +   }
> > +
> > +   vring_free(&vq->vq);
> > +
> > +   vring_init(&vring, num, queue, vq->split.vring_align);
> > +   vring_virtqueue_attach_split(vq, vring, state, extra);
> > +   vq->split.queue_dma_addr = dma_addr;
> > +   vq->split.queue_size_in_bytes = queue_size_in_bytes;
>
>
> I wonder if it's better to move the above assignments to
> vring_virtqueue_attach_split().

I also think so, the reason for not doing this is that there is no dma_addr and
queue_size_in_bytes when vring_virtqueue_attach_split is called in
__vring_new_virtqueue.

As discussed in patch 12, we can pass the struct struct vring_virtqueue_split to
vring_virtqueue_attach_split(). This is much more convenient.

Thanks.

>
> Other looks good.
>
> Thanks
>
>
> > +
> > +   vring_virtqueue_init_split(vq, vdev, true);
> > +   return 0;
> > +}
> > +
> >
> >   /*
> >* Packed ring specific functions - *_packed().
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH V2 4/5] virtio-pci: implement synchronize_vqs()

2022-04-12 Thread Cornelia Huck
On Wed, Apr 13 2022, Jason Wang  wrote:

> On Wed, Apr 13, 2022 at 12:49 AM Cornelia Huck  wrote:
>>
>> On Tue, Apr 12 2022, Halil Pasic  wrote:
>>
>> > On Mon, 11 Apr 2022 16:27:41 +0200
>> > Cornelia Huck  wrote:
>>
>> >> My main concern is that we would need to synchronize against a single
>> >> interrupt that covers all kinds of I/O interrupts, not just a single
>> >> device...
>> >>
>> >
>> > Could we synchronize on struct airq_info's lock member? If we were
>> > to grab all of these that might be involved...
>>
>> Hm, that could possibly narrow the sync down to a subset, which seems
>> better. For devices still using classic interrupts, per-device sync
>> would be easy.
>>
>> >
>> > AFAIU for the synchronize implementation we need a lock or a set of locks
>> > that contain all the possible vring_interrupt() calls with the queuues
>> > that belong to the given device as a critical section. That way, one
>> > has the acquire's and release's in place so that the vrign_interrupt()
>> > either guaranteed to finish before the change of driver_ready is
>> > guaranteed to be complete, or it is guaranteed to see the change.
>> >
>> > In any case, I guess we should first get clear on the first part. I.e.
>> > when do we want to allow host->guest notifications.
>>
>> Also, whether we just care about vring interrupts, or general device
>> interrupts (not sure if a config change interrupt may also trigger
>> things we do not want to trigger?)
>
> I think only vring interrupts, since the config interrupt hardening is
> done via 22b7050a024d7 ("virtio: defer config changed notifications")

Ah thanks, I even reviewed that one back then :)

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v9 08/32] virtio_ring: split: extract the logic of attach vring

2022-04-12 Thread Xuan Zhuo
On Tue, 12 Apr 2022 11:31:08 +0800, Jason Wang  wrote:
>
> 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > Separate the logic of attach vring, subsequent patches will call it
> > separately.
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >   drivers/virtio/virtio_ring.c | 20 ++--
> >   1 file changed, 14 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 6de67439cb57..083f2992ba0d 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -916,6 +916,19 @@ static void *virtqueue_detach_unused_buf_split(struct 
> > virtqueue *_vq)
> > return NULL;
> >   }
> >
> > +static void vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > +struct vring vring,
> > +struct vring_desc_state_split 
> > *desc_state,
> > +struct vring_desc_extra *desc_extra)
> > +{
> > +   vq->split.vring = vring;
> > +   vq->split.queue_dma_addr = 0;
> > +   vq->split.queue_size_in_bytes = 0;
>
>
> Any reason to add the above two assignment in attach? It seems belong to
> free or reset.

As discussed in patch 11, since there is no dma_addr in __vring_new_virtqueue(),
the corresponding vq->split.queue_dma_addr cannot be set, so the purpose here
is just to initialize it.

In the next version, struct vring_virtqueue_split will be passed to
vring_virtqueue_attach_split() to make the logic here look more reasonable.

Thanks.


>
> Thanks
>
>
> > +
> > +   vq->split.desc_state = desc_state;
> > +   vq->split.desc_extra = desc_extra;
> > +}
> > +
> >   static int vring_alloc_state_extra_split(u32 num,
> >  struct vring_desc_state_split 
> > **desc_state,
> >  struct vring_desc_extra **desc_extra)
> > @@ -2262,10 +2275,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int 
> > index,
> > if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM))
> > vq->weak_barriers = false;
> >
> > -   vq->split.queue_dma_addr = 0;
> > -   vq->split.queue_size_in_bytes = 0;
> > -
> > -   vq->split.vring = vring;
> > vq->split.avail_flags_shadow = 0;
> > vq->split.avail_idx_shadow = 0;
> >
> > @@ -2283,8 +2292,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int 
> > index,
> > return NULL;
> > }
> >
> > -   vq->split.desc_state = state;
> > -   vq->split.desc_extra = extra;
> > +   vring_virtqueue_attach_split(vq, vring, state, extra);
> >
> > /* Put everything in free lists. */
> > vq->free_head = 0;
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v9 07/32] virtio_ring: split: extract the logic of alloc state and extra

2022-04-12 Thread Xuan Zhuo
On Tue, 12 Apr 2022 11:26:49 +0800, Jason Wang  wrote:
>
> 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > Separate the logic of creating desc_state, desc_extra, and subsequent
> > patches will call it independently.
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >   drivers/virtio/virtio_ring.c | 53 ++--
> >   1 file changed, 38 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 72d5ae063fa0..6de67439cb57 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -198,6 +198,7 @@ struct vring_virtqueue {
> >   #endif
> >   };
> >
> > +static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num);
> >
> >   /*
> >* Helpers.
> > @@ -915,6 +916,33 @@ static void *virtqueue_detach_unused_buf_split(struct 
> > virtqueue *_vq)
> > return NULL;
> >   }
> >
> > +static int vring_alloc_state_extra_split(u32 num,
> > +struct vring_desc_state_split 
> > **desc_state,
> > +struct vring_desc_extra **desc_extra)
> > +{
> > +   struct vring_desc_state_split *state;
> > +   struct vring_desc_extra *extra;
> > +
> > +   state = kmalloc_array(num, sizeof(struct vring_desc_state_split), 
> > GFP_KERNEL);
> > +   if (!state)
> > +   goto err_state;
> > +
> > +   extra = vring_alloc_desc_extra(num);
> > +   if (!extra)
> > +   goto err_extra;
> > +
> > +   memset(state, 0, num * sizeof(struct vring_desc_state_split));
> > +
> > +   *desc_state = state;
> > +   *desc_extra = extra;
> > +   return 0;
> > +
> > +err_extra:
> > +   kfree(state);
> > +err_state:
> > +   return -ENOMEM;
> > +}
> > +
> >   static void *vring_alloc_queue_split(struct virtio_device *vdev,
> >  dma_addr_t *dma_addr,
> >  u32 *n,
> > @@ -2196,7 +2224,10 @@ struct virtqueue *__vring_new_virtqueue(unsigned int 
> > index,
> > void (*callback)(struct virtqueue *),
> > const char *name)
> >   {
> > +   struct vring_desc_state_split *state;
> > +   struct vring_desc_extra *extra;
> > struct vring_virtqueue *vq;
> > +   int err;
> >
> > if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
> > return NULL;
> > @@ -2246,30 +2277,22 @@ struct virtqueue *__vring_new_virtqueue(unsigned 
> > int index,
> > vq->split.avail_flags_shadow);
> > }
> >
> > -   vq->split.desc_state = kmalloc_array(vring.num,
> > -   sizeof(struct vring_desc_state_split), GFP_KERNEL);
> > -   if (!vq->split.desc_state)
> > -   goto err_state;
> > +   err = vring_alloc_state_extra_split(vring.num, &state, &extra);
>
>
> Nit: we can pass e.g &vq->split.desc_state here to avoid extra temp
> variable and assignment.

The reason for not doing this is that when we implement resize later, when we
call vring_alloc_state_extra_split(), we want to keep the old desc_state, and
desc_extra because we want to release them.

As discussed in patch 11, 12, struct vring_virtqueue_split will optimize this
logic.

Thanks.

>
> Other looks good.
>
> Thanks
>
>
> > +   if (err) {
> > +   kfree(vq);
> > +   return NULL;
> > +   }
> >
> > -   vq->split.desc_extra = vring_alloc_desc_extra(vring.num);
> > -   if (!vq->split.desc_extra)
> > -   goto err_extra;
> > +   vq->split.desc_state = state;
> > +   vq->split.desc_extra = extra;
> >
> > /* Put everything in free lists. */
> > vq->free_head = 0;
> > -   memset(vq->split.desc_state, 0, vring.num *
> > -   sizeof(struct vring_desc_state_split));
> >
> > spin_lock(&vdev->vqs_list_lock);
> > list_add_tail(&vq->vq.list, &vdev->vqs);
> > spin_unlock(&vdev->vqs_list_lock);
> > return &vq->vq;
> > -
> > -err_extra:
> > -   kfree(vq->split.desc_state);
> > -err_state:
> > -   kfree(vq);
> > -   return NULL;
> >   }
> >   EXPORT_SYMBOL_GPL(__vring_new_virtqueue);
> >
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v9 06/32] virtio_ring: split: extract the logic of alloc queue

2022-04-12 Thread Xuan Zhuo
On Tue, 12 Apr 2022 11:22:33 +0800, Jason Wang  wrote:
>
> 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > Separate the logic of split to create vring queue.
> >
> > This feature is required for subsequent virtuqueue reset vring.
> >
> > Signed-off-by: Xuan Zhuo 
> > ---
> >   drivers/virtio/virtio_ring.c | 53 
> >   1 file changed, 36 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 33fddfb907a6..72d5ae063fa0 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -915,23 +915,15 @@ static void *virtqueue_detach_unused_buf_split(struct 
> > virtqueue *_vq)
> > return NULL;
> >   }
> >
> > -static struct virtqueue *vring_create_virtqueue_split(
> > -   unsigned int index,
> > -   unsigned int num,
> > -   unsigned int vring_align,
> > -   struct virtio_device *vdev,
> > -   bool weak_barriers,
> > -   bool may_reduce_num,
> > -   bool context,
> > -   bool (*notify)(struct virtqueue *),
> > -   void (*callback)(struct virtqueue *),
> > -   const char *name)
> > +static void *vring_alloc_queue_split(struct virtio_device *vdev,
> > +dma_addr_t *dma_addr,
> > +u32 *n,
> > +unsigned int vring_align,
> > +bool weak_barriers,
>
>
> This is not used in this function.

The next version will fix it.

Thanks.


>
> Thanks
>
>
> > +bool may_reduce_num)
> >   {
> > -   struct virtqueue *vq;
> > void *queue = NULL;
> > -   dma_addr_t dma_addr;
> > -   size_t queue_size_in_bytes;
> > -   struct vring vring;
> > +   u32 num = *n;
> >
> > /* We assume num is a power of 2. */
> > if (num & (num - 1)) {
> > @@ -942,7 +934,7 @@ static struct virtqueue *vring_create_virtqueue_split(
> > /* TODO: allocate each queue chunk individually */
> > for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
> > queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
> > - &dma_addr,
> > + dma_addr,
> >   GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
> > if (queue)
> > break;
> > @@ -956,11 +948,38 @@ static struct virtqueue *vring_create_virtqueue_split(
> > if (!queue) {
> > /* Try to get a single page. You are my only hope! */
> > queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
> > - &dma_addr, GFP_KERNEL|__GFP_ZERO);
> > + dma_addr, GFP_KERNEL|__GFP_ZERO);
> > }
> > if (!queue)
> > return NULL;
> >
> > +   *n = num;
> > +   return queue;
> > +}
> > +
> > +static struct virtqueue *vring_create_virtqueue_split(
> > +   unsigned int index,
> > +   unsigned int num,
> > +   unsigned int vring_align,
> > +   struct virtio_device *vdev,
> > +   bool weak_barriers,
> > +   bool may_reduce_num,
> > +   bool context,
> > +   bool (*notify)(struct virtqueue *),
> > +   void (*callback)(struct virtqueue *),
> > +   const char *name)
> > +{
> > +   size_t queue_size_in_bytes;
> > +   struct virtqueue *vq;
> > +   dma_addr_t dma_addr;
> > +   struct vring vring;
> > +   void *queue;
> > +
> > +   queue = vring_alloc_queue_split(vdev, &dma_addr, &num, vring_align,
> > +   weak_barriers, may_reduce_num);
> > +   if (!queue)
> > +   return NULL;
> > +
> > queue_size_in_bytes = vring_size(num, vring_align);
> > vring_init(&vring, num, queue, vring_align);
> >
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization