Re: [PATCH v4 1/1] oslib-posix: initialize backend memory objects in parallel

2024-02-04 Thread David Hildenbrand

On 03.02.24 23:43, Dongli Zhang wrote:



On 1/31/24 08:53, Mark Kanda wrote:

QEMU initializes preallocated backend memory as the objects are parsed from
the command line. This is not optimal in some cases (e.g. memory spanning
multiple NUMA nodes) because the memory objects are initialized in series.

Allow the initialization to occur in parallel (asynchronously). In order to
ensure optimal thread placement, asynchronous initialization requires prealloc
context threads to be in use.

Signed-off-by: Mark Kanda 
Signed-off-by: David Hildenbrand 
---
  backends/hostmem.c |   7 ++-
  hw/virtio/virtio-mem.c |   4 +-
  include/hw/qdev-core.h |   5 ++
  include/qemu/osdep.h   |  18 +-
  system/vl.c|   9 +++
  util/oslib-posix.c | 131 +++--
  util/oslib-win32.c |   8 ++-
  7 files changed, 145 insertions(+), 37 deletions(-)

diff --git a/backends/hostmem.c b/backends/hostmem.c
index 30f69b2cb5..17221e422a 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -20,6 +20,7 @@
  #include "qom/object_interfaces.h"
  #include "qemu/mmap-alloc.h"
  #include "qemu/madvise.h"
+#include "hw/qdev-core.h"
  
  #ifdef CONFIG_NUMA

  #include 
@@ -237,7 +238,7 @@ static void host_memory_backend_set_prealloc(Object *obj, 
bool value,
  uint64_t sz = memory_region_size(>mr);
  
  if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,

-   backend->prealloc_context, errp)) {
+   backend->prealloc_context, false, errp)) {
  return;
  }
  backend->prealloc = true;
@@ -323,6 +324,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
Error **errp)
  HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
  void *ptr;
  uint64_t sz;
+bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
  
  if (!bc->alloc) {

  return;
@@ -398,7 +400,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
Error **errp)
  if (backend->prealloc && 
!qemu_prealloc_mem(memory_region_get_fd(>mr),
  ptr, sz,
  backend->prealloc_threads,
-backend->prealloc_context, 
errp)) {
+backend->prealloc_context,
+async, errp)) {
  return;
  }
  }
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 99ab989852..ffd119ebac 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, 
uint64_t start_gpa,
  int fd = memory_region_get_fd(>memdev->mr);
  Error *local_err = NULL;
  
-if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) {

+if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) {
  static bool warned;
  
  /*

@@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, 
void *arg,
  int fd = memory_region_get_fd(>memdev->mr);
  Error *local_err = NULL;
  
-if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) {

+if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) {
  error_report_err(local_err);
  return -ENOMEM;
  }
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 151d968238..83dd9e2485 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -1071,6 +1071,11 @@ typedef enum MachineInitPhase {
   */
  PHASE_ACCEL_CREATED,
  
+/*

+ * Late backend objects have been created and initialized.
+ */
+PHASE_LATE_BACKENDS_CREATED,
+
  /*
   * machine_class->init has been called, thus creating any embedded
   * devices and validating machine properties.  Devices created at
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index c9692cc314..7d359dabc4 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext;
   * @area: start address of the are to preallocate
   * @sz: the size of the area to preallocate
   * @max_threads: maximum number of threads to use
+ * @tc: prealloc context threads pointer, NULL if not in use
+ * @async: request asynchronous preallocation, requires @tc
   * @errp: returns an error if this function fails
   *
   * Preallocate memory (populate/prefault page tables writable) for the virtual
@@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext;
   * each page in the area was faulted in writable at least once, for example,
   * after allocating file blocks for mapped files.
   *
+ * When setting @async, allocation might be performed asynchronously.
+ * qemu_finish_async_prealloc_mem() must be called to finish any asynchronous
+ * preallocation.
+ *
   * Return: true on success, else false setting @errp with 

Re: [PATCH v4 1/1] oslib-posix: initialize backend memory objects in parallel

2024-02-03 Thread Dongli Zhang



On 1/31/24 08:53, Mark Kanda wrote:
> QEMU initializes preallocated backend memory as the objects are parsed from
> the command line. This is not optimal in some cases (e.g. memory spanning
> multiple NUMA nodes) because the memory objects are initialized in series.
> 
> Allow the initialization to occur in parallel (asynchronously). In order to
> ensure optimal thread placement, asynchronous initialization requires prealloc
> context threads to be in use.
> 
> Signed-off-by: Mark Kanda 
> Signed-off-by: David Hildenbrand 
> ---
>  backends/hostmem.c |   7 ++-
>  hw/virtio/virtio-mem.c |   4 +-
>  include/hw/qdev-core.h |   5 ++
>  include/qemu/osdep.h   |  18 +-
>  system/vl.c|   9 +++
>  util/oslib-posix.c | 131 +++--
>  util/oslib-win32.c |   8 ++-
>  7 files changed, 145 insertions(+), 37 deletions(-)
> 
> diff --git a/backends/hostmem.c b/backends/hostmem.c
> index 30f69b2cb5..17221e422a 100644
> --- a/backends/hostmem.c
> +++ b/backends/hostmem.c
> @@ -20,6 +20,7 @@
>  #include "qom/object_interfaces.h"
>  #include "qemu/mmap-alloc.h"
>  #include "qemu/madvise.h"
> +#include "hw/qdev-core.h"
>  
>  #ifdef CONFIG_NUMA
>  #include 
> @@ -237,7 +238,7 @@ static void host_memory_backend_set_prealloc(Object *obj, 
> bool value,
>  uint64_t sz = memory_region_size(>mr);
>  
>  if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
> -   backend->prealloc_context, errp)) {
> +   backend->prealloc_context, false, errp)) {
>  return;
>  }
>  backend->prealloc = true;
> @@ -323,6 +324,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
> Error **errp)
>  HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
>  void *ptr;
>  uint64_t sz;
> +bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
>  
>  if (!bc->alloc) {
>  return;
> @@ -398,7 +400,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
> Error **errp)
>  if (backend->prealloc && 
> !qemu_prealloc_mem(memory_region_get_fd(>mr),
>  ptr, sz,
>  backend->prealloc_threads,
> -backend->prealloc_context, 
> errp)) {
> +backend->prealloc_context,
> +async, errp)) {
>  return;
>  }
>  }
> diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
> index 99ab989852..ffd119ebac 100644
> --- a/hw/virtio/virtio-mem.c
> +++ b/hw/virtio/virtio-mem.c
> @@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, 
> uint64_t start_gpa,
>  int fd = memory_region_get_fd(>memdev->mr);
>  Error *local_err = NULL;
>  
> -if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) {
> +if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) {
>  static bool warned;
>  
>  /*
> @@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM 
> *vmem, void *arg,
>  int fd = memory_region_get_fd(>memdev->mr);
>  Error *local_err = NULL;
>  
> -if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) {
> +if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) {
>  error_report_err(local_err);
>  return -ENOMEM;
>  }
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index 151d968238..83dd9e2485 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -1071,6 +1071,11 @@ typedef enum MachineInitPhase {
>   */
>  PHASE_ACCEL_CREATED,
>  
> +/*
> + * Late backend objects have been created and initialized.
> + */
> +PHASE_LATE_BACKENDS_CREATED,
> +
>  /*
>   * machine_class->init has been called, thus creating any embedded
>   * devices and validating machine properties.  Devices created at
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index c9692cc314..7d359dabc4 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext;
>   * @area: start address of the are to preallocate
>   * @sz: the size of the area to preallocate
>   * @max_threads: maximum number of threads to use
> + * @tc: prealloc context threads pointer, NULL if not in use
> + * @async: request asynchronous preallocation, requires @tc
>   * @errp: returns an error if this function fails
>   *
>   * Preallocate memory (populate/prefault page tables writable) for the 
> virtual
> @@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext;
>   * each page in the area was faulted in writable at least once, for example,
>   * after allocating file blocks for mapped files.
>   *
> + * When setting @async, allocation might be performed asynchronously.
> + * 

Re: [PATCH v4 1/1] oslib-posix: initialize backend memory objects in parallel

2024-02-02 Thread Mario Casquero
This patch has been successfully tested by QE. After configuring two
memory-backends with preallocation context objects, binded to two host
nodes; the result is QEMU being at least three times faster than
before.

# time /usr/libexec/qemu-kvm -M q35 -m 16G,maxmem=32G -numa
node,memdev=mem0,nodeid=0 -numa node,memdev=mem1,nodeid=1 -object
thread-context,id=tc1,node-affinity=0 -object
thread-context,id=tc2,node-affinity=1 -object
memory-backend-ram,id=mem0,size=8G,policy=bind,host-nodes=0 -object
memory-backend-ram,id=mem1,size=8G,policy=bind,host-nodes=1 -nographic
-monitor stdio
QEMU 8.2.0 monitor - type 'help' for more information
qemu-kvm: cannot use stdio by multiple character devices
qemu-kvm: could not connect serial device to character backend 'stdio'

real 0m0.038s
user 0m0.013s
sys 0m0.005s

# time /home/qemu/build/qemu-system-x86_64 -M q35 -m 16G,maxmem=32G
-numa node,memdev=mem0,nodeid=0 -numa node,memdev=mem1,nodeid=1
-object thread-context,id=tc1,node-affinity=0 -object
thread-context,id=tc2,node-affinity=1 -object
memory-backend-ram,id=mem0,size=8G,policy=bind,host-nodes=0 -object
memory-backend-ram,id=mem1,size=8G,policy=bind,host-nodes=1 -nographic
-monitor stdio
QEMU 8.2.50 monitor - type 'help' for more information
qemu-system-x86_64: cannot use stdio by multiple character devices
qemu-system-x86_64: could not connect serial device to character backend 'stdio'

real 0m0.012s
user 0m0.006s
sys 0m0.007s

Tested-by: Mario Casquero 


On Wed, Jan 31, 2024 at 7:24 PM David Hildenbrand  wrote:
>
> On 31.01.24 17:53, Mark Kanda wrote:
> > QEMU initializes preallocated backend memory as the objects are parsed from
> > the command line. This is not optimal in some cases (e.g. memory spanning
> > multiple NUMA nodes) because the memory objects are initialized in series.
> >
> > Allow the initialization to occur in parallel (asynchronously). In order to
> > ensure optimal thread placement, asynchronous initialization requires 
> > prealloc
> > context threads to be in use.
> >
> > Signed-off-by: Mark Kanda 
> > Signed-off-by: David Hildenbrand 
> > ---
>
> So, this LGTM. There might be ways to not rely on phases to achieve what
> we want to achieve (e.g., let the machine set an internal property on
> memory backends we create from the cmdline), but this should do as well.
>
> I'll wait a bit for more feedback. If there is none, I'll route this
> through my tree (after doing a quick sanity test).
>
> Thanks!
>
> --
> Cheers,
>
> David / dhildenb
>
>




Re: [PATCH v4 1/1] oslib-posix: initialize backend memory objects in parallel

2024-01-31 Thread David Hildenbrand

On 31.01.24 17:53, Mark Kanda wrote:

QEMU initializes preallocated backend memory as the objects are parsed from
the command line. This is not optimal in some cases (e.g. memory spanning
multiple NUMA nodes) because the memory objects are initialized in series.

Allow the initialization to occur in parallel (asynchronously). In order to
ensure optimal thread placement, asynchronous initialization requires prealloc
context threads to be in use.

Signed-off-by: Mark Kanda 
Signed-off-by: David Hildenbrand 
---


So, this LGTM. There might be ways to not rely on phases to achieve what 
we want to achieve (e.g., let the machine set an internal property on 
memory backends we create from the cmdline), but this should do as well.


I'll wait a bit for more feedback. If there is none, I'll route this 
through my tree (after doing a quick sanity test).


Thanks!

--
Cheers,

David / dhildenb




[PATCH v4 1/1] oslib-posix: initialize backend memory objects in parallel

2024-01-31 Thread Mark Kanda
QEMU initializes preallocated backend memory as the objects are parsed from
the command line. This is not optimal in some cases (e.g. memory spanning
multiple NUMA nodes) because the memory objects are initialized in series.

Allow the initialization to occur in parallel (asynchronously). In order to
ensure optimal thread placement, asynchronous initialization requires prealloc
context threads to be in use.

Signed-off-by: Mark Kanda 
Signed-off-by: David Hildenbrand 
---
 backends/hostmem.c |   7 ++-
 hw/virtio/virtio-mem.c |   4 +-
 include/hw/qdev-core.h |   5 ++
 include/qemu/osdep.h   |  18 +-
 system/vl.c|   9 +++
 util/oslib-posix.c | 131 +++--
 util/oslib-win32.c |   8 ++-
 7 files changed, 145 insertions(+), 37 deletions(-)

diff --git a/backends/hostmem.c b/backends/hostmem.c
index 30f69b2cb5..17221e422a 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -20,6 +20,7 @@
 #include "qom/object_interfaces.h"
 #include "qemu/mmap-alloc.h"
 #include "qemu/madvise.h"
+#include "hw/qdev-core.h"
 
 #ifdef CONFIG_NUMA
 #include 
@@ -237,7 +238,7 @@ static void host_memory_backend_set_prealloc(Object *obj, 
bool value,
 uint64_t sz = memory_region_size(>mr);
 
 if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
-   backend->prealloc_context, errp)) {
+   backend->prealloc_context, false, errp)) {
 return;
 }
 backend->prealloc = true;
@@ -323,6 +324,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
Error **errp)
 HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
 void *ptr;
 uint64_t sz;
+bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
 
 if (!bc->alloc) {
 return;
@@ -398,7 +400,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, 
Error **errp)
 if (backend->prealloc && 
!qemu_prealloc_mem(memory_region_get_fd(>mr),
 ptr, sz,
 backend->prealloc_threads,
-backend->prealloc_context, 
errp)) {
+backend->prealloc_context,
+async, errp)) {
 return;
 }
 }
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 99ab989852..ffd119ebac 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, 
uint64_t start_gpa,
 int fd = memory_region_get_fd(>memdev->mr);
 Error *local_err = NULL;
 
-if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) {
+if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) {
 static bool warned;
 
 /*
@@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, 
void *arg,
 int fd = memory_region_get_fd(>memdev->mr);
 Error *local_err = NULL;
 
-if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) {
+if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) {
 error_report_err(local_err);
 return -ENOMEM;
 }
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 151d968238..83dd9e2485 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -1071,6 +1071,11 @@ typedef enum MachineInitPhase {
  */
 PHASE_ACCEL_CREATED,
 
+/*
+ * Late backend objects have been created and initialized.
+ */
+PHASE_LATE_BACKENDS_CREATED,
+
 /*
  * machine_class->init has been called, thus creating any embedded
  * devices and validating machine properties.  Devices created at
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index c9692cc314..7d359dabc4 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext;
  * @area: start address of the are to preallocate
  * @sz: the size of the area to preallocate
  * @max_threads: maximum number of threads to use
+ * @tc: prealloc context threads pointer, NULL if not in use
+ * @async: request asynchronous preallocation, requires @tc
  * @errp: returns an error if this function fails
  *
  * Preallocate memory (populate/prefault page tables writable) for the virtual
@@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext;
  * each page in the area was faulted in writable at least once, for example,
  * after allocating file blocks for mapped files.
  *
+ * When setting @async, allocation might be performed asynchronously.
+ * qemu_finish_async_prealloc_mem() must be called to finish any asynchronous
+ * preallocation.
+ *
  * Return: true on success, else false setting @errp with error.
  */
 bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
-   ThreadContext *tc, Error **errp);
+