Restructure the device_memory SRAT umbrella entry into a per-kind partition: each TYPE_SP_MEM device gets an ENABLED entry at its own proximity_domain; the remaining sub-ranges get HOTPLUGGABLE | ENABLED placeholders at the highest PXM, preserving the existing umbrella convention.
Signed-off-by: FangSheng Huang <[email protected]> --- hw/i386/acpi-build.c | 96 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 4 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 0d7c83d5e9..db144b5706 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -52,6 +52,7 @@ #include "migration/vmstate.h" #include "hw/mem/memory-device.h" #include "hw/mem/nvdimm.h" +#include "hw/mem/sp-mem.h" #include "system/numa.h" #include "system/reset.h" #include "hw/hyperv/vmbus-bridge.h" @@ -1346,6 +1347,96 @@ build_tpm_tcpa(GArray *table_data, BIOSLinker *linker, GArray *tcpalog, } #endif +typedef struct { + uint64_t addr; + uint64_t size; + uint32_t node; +} SpMemRange; + +static int sp_mem_collect_ranges_cb(Object *obj, void *opaque) +{ + GArray *ranges = opaque; + SpMemDevice *spm; + MemoryDeviceClass *mdc; + SpMemRange r; + + if (!object_dynamic_cast(obj, TYPE_SP_MEM)) { + return 0; + } + spm = SP_MEM(obj); + mdc = MEMORY_DEVICE_GET_CLASS(MEMORY_DEVICE(spm)); + r.addr = mdc->get_addr(MEMORY_DEVICE(spm)); + r.size = memory_region_size( + host_memory_backend_get_memory(spm->hostmem)); + r.node = spm->node; + g_array_append_val(ranges, r); + return 0; +} + +static gint sp_mem_range_compare(gconstpointer a, gconstpointer b) +{ + const SpMemRange *range_a = a; + const SpMemRange *range_b = b; + + if (range_a->addr < range_b->addr) { + return -1; + } + if (range_a->addr > range_b->addr) { + return 1; + } + return 0; +} + +/* + * Emit SRAT memory-affinity entries covering the device_memory region. + * + * For each plugged TYPE_SP_MEM device, emit an ENABLED entry at the + * device's own proximity_domain. All remaining sub-ranges (gaps + * between sp-mem devices, leading and trailing padding, and ranges + * occupied by other memory devices) are covered by HOTPLUGGABLE | + * ENABLED placeholder entries at PXM = nb_numa_nodes - 1. + */ +static void build_srat_device_memory(GArray *table_data, MachineState *ms) +{ + g_autoptr(GArray) ranges = g_array_new(FALSE, TRUE, sizeof(SpMemRange)); + uint32_t hotplug_pxm = ms->numa_state->num_nodes - 1; + uint64_t region_start, region_end; + guint i; + + region_start = ms->device_memory->base; + region_end = region_start + memory_region_size(&ms->device_memory->mr); + + object_child_foreach_recursive(qdev_get_machine(), + sp_mem_collect_ranges_cb, ranges); + g_array_sort(ranges, sp_mem_range_compare); + + for (i = 0; i < ranges->len; i++) { + SpMemRange *r = &g_array_index(ranges, SpMemRange, i); + + if (region_start < r->addr) { + build_srat_memory(table_data, region_start, r->addr - region_start, + hotplug_pxm, + MEM_AFFINITY_HOTPLUGGABLE | + MEM_AFFINITY_ENABLED); + } + build_srat_memory(table_data, r->addr, r->size, r->node, + MEM_AFFINITY_ENABLED); + region_start = r->addr + r->size; + } + + /* + * Cover the rest of the device_memory window that no sp-mem device + * occupies. Keeping it HOTPLUGGABLE preserves the umbrella entry's + * role for future pc-dimm / virtio-mem hot-add into this window. + */ + if (region_start < region_end) { + build_srat_memory(table_data, region_start, region_end - region_start, + hotplug_pxm, + MEM_AFFINITY_HOTPLUGGABLE | + MEM_AFFINITY_ENABLED); + } +} + #define HOLE_640K_START (640 * KiB) #define HOLE_640K_END (1 * MiB) @@ -1482,10 +1573,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) * providing _PXM method if necessary. */ if (machine->device_memory) { - build_srat_memory(table_data, machine->device_memory->base, - memory_region_size(&machine->device_memory->mr), - nb_numa_nodes - 1, - MEM_AFFINITY_HOTPLUGGABLE | MEM_AFFINITY_ENABLED); + build_srat_device_memory(table_data, machine); } acpi_table_end(linker, &table); -- 2.34.1
