On Fri, Jun 05, 2026 at 06:46:07PM +0800, fanhuang wrote:
> Restructure the device_memory SRAT umbrella entry into a per-kind
> partition: each TYPE_SP_MEM device gets an ENABLED entry at its own
> proximity_domain; the remaining sub-ranges get HOTPLUGGABLE | ENABLED
> placeholders at the highest PXM, preserving the existing umbrella
> convention.
>
> Signed-off-by: FangSheng Huang <[email protected]>
> ---
> hw/i386/acpi-build.c | 98 +++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 92 insertions(+), 6 deletions(-)
>
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 0d7c83d5e9..76efa83808 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -52,6 +52,7 @@
> #include "migration/vmstate.h"
> #include "hw/mem/memory-device.h"
> #include "hw/mem/nvdimm.h"
> +#include "hw/mem/sp-mem.h"
> #include "system/numa.h"
> #include "system/reset.h"
> #include "hw/hyperv/vmbus-bridge.h"
> @@ -1346,6 +1347,96 @@ build_tpm_tcpa(GArray *table_data, BIOSLinker *linker,
> GArray *tcpalog,
> }
> #endif
>
> +typedef struct {
> + uint64_t addr;
> + uint64_t size;
> + uint32_t node;
> +} SpMemRange;
> +
> +static int collect_sp_mem_ranges_cb(Object *obj, void *opaque)
> +{
> + GArray *ranges = opaque;
> + SpMemDevice *spm;
> + MemoryDeviceClass *mdc;
> + SpMemRange r;
> +
> + if (!object_dynamic_cast(obj, TYPE_SP_MEM)) {
> + return 0;
> + }
> + spm = SP_MEM(obj);
> + mdc = MEMORY_DEVICE_GET_CLASS(MEMORY_DEVICE(spm));
> + r.addr = mdc->get_addr(MEMORY_DEVICE(spm));
> + r.size = memory_region_size(
> + host_memory_backend_get_memory(spm->hostmem));
> + r.node = spm->node;
> + g_array_append_val(ranges, r);
> + return 0;
> +}
> +
> +static gint sp_mem_range_compare(gconstpointer a, gconstpointer b)
> +{
> + const SpMemRange *range_a = a;
> + const SpMemRange *range_b = b;
> +
> + if (range_a->addr < range_b->addr) {
> + return -1;
> + }
> + if (range_a->addr > range_b->addr) {
> + return 1;
> + }
> + return 0;
> +}
> +
> +/*
> + * Emit SRAT memory-affinity entries covering the device_memory region.
> + *
> + * For each plugged TYPE_SP_MEM device, emit an ENABLED entry at the
> + * device's own proximity_domain. All remaining sub-ranges (gaps
> + * between sp-mem devices, leading and trailing padding, and ranges
> + * occupied by other memory devices) are covered by HOTPLUGGABLE |
> + * ENABLED placeholder entries at PXM = nb_numa_nodes - 1.
> + */
> +static void build_srat_device_memory(GArray *table_data, MachineState *ms)
> +{
> + g_autoptr(GArray) ranges = g_array_new(FALSE, TRUE, sizeof(SpMemRange));
> + uint64_t cursor, end;
> + int nb_nodes = ms->numa_state ? ms->numa_state->num_nodes : 0;
> + uint32_t hotplug_pxm = nb_nodes > 0 ? nb_nodes - 1 : 0;
> + guint i;
> +
> + if (!ms->device_memory) {
> + return;
> + }
> +
> + cursor = ms->device_memory->base;
> + end = cursor + memory_region_size(&ms->device_memory->mr);
> +
> + object_child_foreach_recursive(qdev_get_machine(),
> + collect_sp_mem_ranges_cb, ranges);
> + g_array_sort(ranges, sp_mem_range_compare);
> +
> + for (i = 0; i < ranges->len; i++) {
> + SpMemRange *r = &g_array_index(ranges, SpMemRange, i);
> +
> + if (cursor < r->addr) {
> + build_srat_memory(table_data, cursor, r->addr - cursor,
> + hotplug_pxm,
> + MEM_AFFINITY_HOTPLUGGABLE |
> + MEM_AFFINITY_ENABLED);
> + }
> + build_srat_memory(table_data, r->addr, r->size, r->node,
> + MEM_AFFINITY_ENABLED);
> + cursor = r->addr + r->size;
> + }
> +
Should we expect (cursor == end) here and fire a warning if there wasn't
a full range to cover the region? If not - why set it all as
hotpluggable anyway?
(Maybe just worth a comment here what the last block is actually doing)
> + if (cursor < end) {
> + build_srat_memory(table_data, cursor, end - cursor,
> + hotplug_pxm,
> + MEM_AFFINITY_HOTPLUGGABLE |
> + MEM_AFFINITY_ENABLED);
> + }
> +}
> +
> #define HOLE_640K_START (640 * KiB)
> #define HOLE_640K_END (1 * MiB)
>
> @@ -1481,12 +1572,7 @@ build_srat(GArray *table_data, BIOSLinker *linker,
> MachineState *machine)
> * Memory devices may override proximity set by this entry,
> * providing _PXM method if necessary.
> */
> - if (machine->device_memory) {
> - build_srat_memory(table_data, machine->device_memory->base,
> - memory_region_size(&machine->device_memory->mr),
> - nb_numa_nodes - 1,
> - MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED);
> - }
> + build_srat_device_memory(table_data, machine);
>
> acpi_table_end(linker, &table);
> }
> --
> 2.34.1
>