Re: [PATCH v4 1/1] oslib-posix: initialize backend memory objects in parallel
On 03.02.24 23:43, Dongli Zhang wrote: On 1/31/24 08:53, Mark Kanda wrote: QEMU initializes preallocated backend memory as the objects are parsed from the command line. This is not optimal in some cases (e.g. memory spanning multiple NUMA nodes) because the memory objects are initialized in series. Allow the initialization to occur in parallel (asynchronously). In order to ensure optimal thread placement, asynchronous initialization requires prealloc context threads to be in use. Signed-off-by: Mark Kanda Signed-off-by: David Hildenbrand --- backends/hostmem.c | 7 ++- hw/virtio/virtio-mem.c | 4 +- include/hw/qdev-core.h | 5 ++ include/qemu/osdep.h | 18 +- system/vl.c| 9 +++ util/oslib-posix.c | 131 +++-- util/oslib-win32.c | 8 ++- 7 files changed, 145 insertions(+), 37 deletions(-) diff --git a/backends/hostmem.c b/backends/hostmem.c index 30f69b2cb5..17221e422a 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -20,6 +20,7 @@ #include "qom/object_interfaces.h" #include "qemu/mmap-alloc.h" #include "qemu/madvise.h" +#include "hw/qdev-core.h" #ifdef CONFIG_NUMA #include @@ -237,7 +238,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, uint64_t sz = memory_region_size(>mr); if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads, - backend->prealloc_context, errp)) { + backend->prealloc_context, false, errp)) { return; } backend->prealloc = true; @@ -323,6 +324,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); void *ptr; uint64_t sz; +bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED); if (!bc->alloc) { return; @@ -398,7 +400,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(>mr), ptr, sz, backend->prealloc_threads, -backend->prealloc_context, errp)) { +backend->prealloc_context, +async, errp)) { return; } } diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 99ab989852..ffd119ebac 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, int fd = memory_region_get_fd(>memdev->mr); Error *local_err = NULL; -if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) { +if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) { static bool warned; /* @@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg, int fd = memory_region_get_fd(>memdev->mr); Error *local_err = NULL; -if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) { +if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) { error_report_err(local_err); return -ENOMEM; } diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 151d968238..83dd9e2485 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -1071,6 +1071,11 @@ typedef enum MachineInitPhase { */ PHASE_ACCEL_CREATED, +/* + * Late backend objects have been created and initialized. + */ +PHASE_LATE_BACKENDS_CREATED, + /* * machine_class->init has been called, thus creating any embedded * devices and validating machine properties. Devices created at diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index c9692cc314..7d359dabc4 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext; * @area: start address of the are to preallocate * @sz: the size of the area to preallocate * @max_threads: maximum number of threads to use + * @tc: prealloc context threads pointer, NULL if not in use + * @async: request asynchronous preallocation, requires @tc * @errp: returns an error if this function fails * * Preallocate memory (populate/prefault page tables writable) for the virtual @@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext; * each page in the area was faulted in writable at least once, for example, * after allocating file blocks for mapped files. * + * When setting @async, allocation might be performed asynchronously. + * qemu_finish_async_prealloc_mem() must be called to finish any asynchronous + * preallocation. + * * Return: true on success, else false setting @errp with
Re: [PATCH v4 1/1] oslib-posix: initialize backend memory objects in parallel
On 1/31/24 08:53, Mark Kanda wrote: > QEMU initializes preallocated backend memory as the objects are parsed from > the command line. This is not optimal in some cases (e.g. memory spanning > multiple NUMA nodes) because the memory objects are initialized in series. > > Allow the initialization to occur in parallel (asynchronously). In order to > ensure optimal thread placement, asynchronous initialization requires prealloc > context threads to be in use. > > Signed-off-by: Mark Kanda > Signed-off-by: David Hildenbrand > --- > backends/hostmem.c | 7 ++- > hw/virtio/virtio-mem.c | 4 +- > include/hw/qdev-core.h | 5 ++ > include/qemu/osdep.h | 18 +- > system/vl.c| 9 +++ > util/oslib-posix.c | 131 +++-- > util/oslib-win32.c | 8 ++- > 7 files changed, 145 insertions(+), 37 deletions(-) > > diff --git a/backends/hostmem.c b/backends/hostmem.c > index 30f69b2cb5..17221e422a 100644 > --- a/backends/hostmem.c > +++ b/backends/hostmem.c > @@ -20,6 +20,7 @@ > #include "qom/object_interfaces.h" > #include "qemu/mmap-alloc.h" > #include "qemu/madvise.h" > +#include "hw/qdev-core.h" > > #ifdef CONFIG_NUMA > #include > @@ -237,7 +238,7 @@ static void host_memory_backend_set_prealloc(Object *obj, > bool value, > uint64_t sz = memory_region_size(>mr); > > if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads, > - backend->prealloc_context, errp)) { > + backend->prealloc_context, false, errp)) { > return; > } > backend->prealloc = true; > @@ -323,6 +324,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, > Error **errp) > HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); > void *ptr; > uint64_t sz; > +bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED); > > if (!bc->alloc) { > return; > @@ -398,7 +400,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, > Error **errp) > if (backend->prealloc && > !qemu_prealloc_mem(memory_region_get_fd(>mr), > ptr, sz, > backend->prealloc_threads, > -backend->prealloc_context, > errp)) { > +backend->prealloc_context, > +async, errp)) { > return; > } > } > diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c > index 99ab989852..ffd119ebac 100644 > --- a/hw/virtio/virtio-mem.c > +++ b/hw/virtio/virtio-mem.c > @@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, > uint64_t start_gpa, > int fd = memory_region_get_fd(>memdev->mr); > Error *local_err = NULL; > > -if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) { > +if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) { > static bool warned; > > /* > @@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM > *vmem, void *arg, > int fd = memory_region_get_fd(>memdev->mr); > Error *local_err = NULL; > > -if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) { > +if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) { > error_report_err(local_err); > return -ENOMEM; > } > diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h > index 151d968238..83dd9e2485 100644 > --- a/include/hw/qdev-core.h > +++ b/include/hw/qdev-core.h > @@ -1071,6 +1071,11 @@ typedef enum MachineInitPhase { > */ > PHASE_ACCEL_CREATED, > > +/* > + * Late backend objects have been created and initialized. > + */ > +PHASE_LATE_BACKENDS_CREATED, > + > /* > * machine_class->init has been called, thus creating any embedded > * devices and validating machine properties. Devices created at > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h > index c9692cc314..7d359dabc4 100644 > --- a/include/qemu/osdep.h > +++ b/include/qemu/osdep.h > @@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext; > * @area: start address of the are to preallocate > * @sz: the size of the area to preallocate > * @max_threads: maximum number of threads to use > + * @tc: prealloc context threads pointer, NULL if not in use > + * @async: request asynchronous preallocation, requires @tc > * @errp: returns an error if this function fails > * > * Preallocate memory (populate/prefault page tables writable) for the > virtual > @@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext; > * each page in the area was faulted in writable at least once, for example, > * after allocating file blocks for mapped files. > * > + * When setting @async, allocation might be performed asynchronously. > + *
Re: [PATCH v4 1/1] oslib-posix: initialize backend memory objects in parallel
This patch has been successfully tested by QE. After configuring two memory-backends with preallocation context objects, binded to two host nodes; the result is QEMU being at least three times faster than before. # time /usr/libexec/qemu-kvm -M q35 -m 16G,maxmem=32G -numa node,memdev=mem0,nodeid=0 -numa node,memdev=mem1,nodeid=1 -object thread-context,id=tc1,node-affinity=0 -object thread-context,id=tc2,node-affinity=1 -object memory-backend-ram,id=mem0,size=8G,policy=bind,host-nodes=0 -object memory-backend-ram,id=mem1,size=8G,policy=bind,host-nodes=1 -nographic -monitor stdio QEMU 8.2.0 monitor - type 'help' for more information qemu-kvm: cannot use stdio by multiple character devices qemu-kvm: could not connect serial device to character backend 'stdio' real 0m0.038s user 0m0.013s sys 0m0.005s # time /home/qemu/build/qemu-system-x86_64 -M q35 -m 16G,maxmem=32G -numa node,memdev=mem0,nodeid=0 -numa node,memdev=mem1,nodeid=1 -object thread-context,id=tc1,node-affinity=0 -object thread-context,id=tc2,node-affinity=1 -object memory-backend-ram,id=mem0,size=8G,policy=bind,host-nodes=0 -object memory-backend-ram,id=mem1,size=8G,policy=bind,host-nodes=1 -nographic -monitor stdio QEMU 8.2.50 monitor - type 'help' for more information qemu-system-x86_64: cannot use stdio by multiple character devices qemu-system-x86_64: could not connect serial device to character backend 'stdio' real 0m0.012s user 0m0.006s sys 0m0.007s Tested-by: Mario Casquero On Wed, Jan 31, 2024 at 7:24 PM David Hildenbrand wrote: > > On 31.01.24 17:53, Mark Kanda wrote: > > QEMU initializes preallocated backend memory as the objects are parsed from > > the command line. This is not optimal in some cases (e.g. memory spanning > > multiple NUMA nodes) because the memory objects are initialized in series. > > > > Allow the initialization to occur in parallel (asynchronously). In order to > > ensure optimal thread placement, asynchronous initialization requires > > prealloc > > context threads to be in use. > > > > Signed-off-by: Mark Kanda > > Signed-off-by: David Hildenbrand > > --- > > So, this LGTM. There might be ways to not rely on phases to achieve what > we want to achieve (e.g., let the machine set an internal property on > memory backends we create from the cmdline), but this should do as well. > > I'll wait a bit for more feedback. If there is none, I'll route this > through my tree (after doing a quick sanity test). > > Thanks! > > -- > Cheers, > > David / dhildenb > >
Re: [PATCH v4 1/1] oslib-posix: initialize backend memory objects in parallel
On 31.01.24 17:53, Mark Kanda wrote: QEMU initializes preallocated backend memory as the objects are parsed from the command line. This is not optimal in some cases (e.g. memory spanning multiple NUMA nodes) because the memory objects are initialized in series. Allow the initialization to occur in parallel (asynchronously). In order to ensure optimal thread placement, asynchronous initialization requires prealloc context threads to be in use. Signed-off-by: Mark Kanda Signed-off-by: David Hildenbrand --- So, this LGTM. There might be ways to not rely on phases to achieve what we want to achieve (e.g., let the machine set an internal property on memory backends we create from the cmdline), but this should do as well. I'll wait a bit for more feedback. If there is none, I'll route this through my tree (after doing a quick sanity test). Thanks! -- Cheers, David / dhildenb
[PATCH v4 1/1] oslib-posix: initialize backend memory objects in parallel
QEMU initializes preallocated backend memory as the objects are parsed from the command line. This is not optimal in some cases (e.g. memory spanning multiple NUMA nodes) because the memory objects are initialized in series. Allow the initialization to occur in parallel (asynchronously). In order to ensure optimal thread placement, asynchronous initialization requires prealloc context threads to be in use. Signed-off-by: Mark Kanda Signed-off-by: David Hildenbrand --- backends/hostmem.c | 7 ++- hw/virtio/virtio-mem.c | 4 +- include/hw/qdev-core.h | 5 ++ include/qemu/osdep.h | 18 +- system/vl.c| 9 +++ util/oslib-posix.c | 131 +++-- util/oslib-win32.c | 8 ++- 7 files changed, 145 insertions(+), 37 deletions(-) diff --git a/backends/hostmem.c b/backends/hostmem.c index 30f69b2cb5..17221e422a 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -20,6 +20,7 @@ #include "qom/object_interfaces.h" #include "qemu/mmap-alloc.h" #include "qemu/madvise.h" +#include "hw/qdev-core.h" #ifdef CONFIG_NUMA #include @@ -237,7 +238,7 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value, uint64_t sz = memory_region_size(>mr); if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads, - backend->prealloc_context, errp)) { + backend->prealloc_context, false, errp)) { return; } backend->prealloc = true; @@ -323,6 +324,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); void *ptr; uint64_t sz; +bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED); if (!bc->alloc) { return; @@ -398,7 +400,8 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(>mr), ptr, sz, backend->prealloc_threads, -backend->prealloc_context, errp)) { +backend->prealloc_context, +async, errp)) { return; } } diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 99ab989852..ffd119ebac 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -605,7 +605,7 @@ static int virtio_mem_set_block_state(VirtIOMEM *vmem, uint64_t start_gpa, int fd = memory_region_get_fd(>memdev->mr); Error *local_err = NULL; -if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) { +if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) { static bool warned; /* @@ -1248,7 +1248,7 @@ static int virtio_mem_prealloc_range_cb(VirtIOMEM *vmem, void *arg, int fd = memory_region_get_fd(>memdev->mr); Error *local_err = NULL; -if (!qemu_prealloc_mem(fd, area, size, 1, NULL, _err)) { +if (!qemu_prealloc_mem(fd, area, size, 1, NULL, false, _err)) { error_report_err(local_err); return -ENOMEM; } diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 151d968238..83dd9e2485 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -1071,6 +1071,11 @@ typedef enum MachineInitPhase { */ PHASE_ACCEL_CREATED, +/* + * Late backend objects have been created and initialized. + */ +PHASE_LATE_BACKENDS_CREATED, + /* * machine_class->init has been called, thus creating any embedded * devices and validating machine properties. Devices created at diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index c9692cc314..7d359dabc4 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -680,6 +680,8 @@ typedef struct ThreadContext ThreadContext; * @area: start address of the are to preallocate * @sz: the size of the area to preallocate * @max_threads: maximum number of threads to use + * @tc: prealloc context threads pointer, NULL if not in use + * @async: request asynchronous preallocation, requires @tc * @errp: returns an error if this function fails * * Preallocate memory (populate/prefault page tables writable) for the virtual @@ -687,10 +689,24 @@ typedef struct ThreadContext ThreadContext; * each page in the area was faulted in writable at least once, for example, * after allocating file blocks for mapped files. * + * When setting @async, allocation might be performed asynchronously. + * qemu_finish_async_prealloc_mem() must be called to finish any asynchronous + * preallocation. + * * Return: true on success, else false setting @errp with error. */ bool qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads, - ThreadContext *tc, Error **errp); +