According to the number of nodes passed in the CMOS RAM (offset 0x3e)
the BIOS code generates a SRAT (System Resources Affinity Table) to
describe which (V)CPU and which part of memory is assigned to a certain
node. This will then be read and hopefully honored by the guest OS.
Signed-off-by: Andre Przywara <[EMAIL PROTECTED]>
--
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 277-84917
----to satisfy European Law for business letters:
AMD Saxony Limited Liability Company & Co. KG,
Wilschdorfer Landstr. 101, 01109 Dresden, Germany
Register Court Dresden: HRA 4896, General Partner authorized
to represent: AMD Saxony LLC (Wilmington, Delaware, US)
General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
diff --git a/bios/rombios32.c b/bios/rombios32.c
index 3c9a2d7..c0bf08f 100755
--- a/bios/rombios32.c
+++ b/bios/rombios32.c
@@ -1232,7 +1232,7 @@ struct rsdp_descriptor /* Root System Descriptor
Pointer */
struct rsdt_descriptor_rev1
{
ACPI_TABLE_HEADER_DEF /* ACPI common table
header */
- uint32_t table_offset_entry [2]; /* Array
of pointers to other */
+ uint32_t table_offset_entry [3]; /* Array
of pointers to other */
/* ACPI tables */
};
@@ -1350,6 +1350,9 @@ struct multiple_apic_table
#define APIC_XRUPT_SOURCE 8
#define APIC_RESERVED 9 /* 9 and greater are reserved */
+#define SRAT_PROCESSOR 0
+#define SRAT_MEMORY 1
+
/*
* MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
*/
@@ -1357,6 +1360,40 @@ struct multiple_apic_table
uint8_t type; \
uint8_t length;
+/*
+ * SRAT (NUMA topology description) table
+ */
+struct system_resource_affinity_table
+{
+ ACPI_TABLE_HEADER_DEF
+ uint32_t reserved1;
+ uint32_t reserved2[2];
+};
+
+struct srat_processor_affinity
+{
+APIC_HEADER_DEF
+ uint8_t proximity_lo;
+ uint8_t local_apic_id;
+ uint32_t flags;
+ uint8_t local_sapic_eid;
+ uint8_t proximity_hi[3];
+ uint32_t reserved;
+};
+
+struct srat_memory_affinity
+{
+ APIC_HEADER_DEF
+ uint8_t proximity[4];
+ uint16_t reserved1;
+ uint32_t base_addr_low,base_addr_high;
+ uint32_t length_low,length_high;
+ uint32_t reserved2;
+ uint32_t flags;
+ uint32_t reserved3[2];
+};
+
+
/* Sub-structures for MADT */
struct madt_processor_apic
@@ -1443,10 +1480,12 @@ void acpi_bios_init(void)
struct fadt_descriptor_rev1 *fadt;
struct facs_descriptor_rev1 *facs;
struct multiple_apic_table *madt;
+ struct system_resource_affinity_table *srat;
uint8_t *dsdt;
uint32_t base_addr, rsdt_addr, fadt_addr, addr, facs_addr, dsdt_addr;
uint32_t acpi_tables_size, madt_addr, madt_size;
- int i;
+ uint32_t srat_addr, srat_size;
+ int i, numanodes;
/* reserve memory space for tables */
#ifdef BX_USE_EBDA_TABLES
@@ -1478,6 +1517,21 @@ void acpi_bios_init(void)
dsdt = (void *)(addr);
addr += sizeof(AmlCode);
+ numanodes = cmos_readb (0x3E);
+ if (numanodes > 0) {
+ addr = (addr + 7) & ~7;
+ srat_addr = addr;
+ srat_size = sizeof(*srat) +
+ sizeof(struct srat_processor_affinity) * smp_cpus +
+ sizeof(struct srat_memory_affinity) * (numanodes + 2);
+ srat = (void *)(addr);
+ addr += srat_size;
+ } else {
+ srat_addr = addr;
+ srat = (void*)(addr);
+ srat_size = 0;
+ }
+
addr = (addr + 7) & ~7;
madt_addr = addr;
madt_size = sizeof(*madt) +
@@ -1507,8 +1561,10 @@ void acpi_bios_init(void)
memset(rsdt, 0, sizeof(*rsdt));
rsdt->table_offset_entry[0] = cpu_to_le32(fadt_addr);
rsdt->table_offset_entry[1] = cpu_to_le32(madt_addr);
- acpi_build_table_header((struct acpi_table_header *)rsdt,
- "RSDT", sizeof(*rsdt), 1);
+ if (numanodes > 0)
+ rsdt->table_offset_entry[2] = cpu_to_le32(srat_addr);
+ acpi_build_table_header((struct acpi_table_header *)rsdt, "RSDT",
+ sizeof(*rsdt) - (numanodes > 0? 0: sizeof(uint32_t)), 1);
/* FADT */
memset(fadt, 0, sizeof(*fadt));
@@ -1590,6 +1646,92 @@ void acpi_bios_init(void)
acpi_build_table_header((struct acpi_table_header *)madt,
"APIC", madt_size, 1);
}
+
+ /* SRAT */
+ if (numanodes > 0) {
+ struct srat_processor_affinity *core;
+ struct srat_memory_affinity *numamem;
+ int nodenr = 0, slots;
+ unsigned long numa_chunk_size;
+ uint64_t mem_len, mem_base, next_base = 0;
+
+ if (ram_end == ram_size) {
+ numa_chunk_size = (ram_size / numanodes) >> 20;
+ } else {
+ numa_chunk_size = (1ULL << 32) - ram_size;
+ numa_chunk_size = (ram_end - numa_chunk_size) >> 20;
+ numa_chunk_size /= numanodes;
+ }
+
+ memset (srat, 0 , srat_size);
+ srat->reserved1=1;
+ core = (void*)(srat + 1);
+ for (i = 0; i < smp_cpus; ++i) {
+ core->type = SRAT_PROCESSOR;
+ core->length = sizeof(*core);
+ core->local_apic_id = i;
+ core->proximity_lo = i % numanodes;
+ memset (core->proximity_hi, 0, 3);
+ core->local_sapic_eid = 0;
+ if (i < smp_cpus)
+ core->flags = cpu_to_le32(1);
+ else
+ core->flags = 0;
+ core++;
+ }
+ numamem = (void*)core; slots = 0;
+ for (i = 0; i < numanodes + 1; ++i) {
+ numamem->type = SRAT_MEMORY;
+ numamem->length = sizeof(*numamem);
+ memset (numamem->proximity, 0 ,4);
+ numamem->proximity[0] = nodenr;
+ mem_base = next_base;
+ mem_len = (uint64_t)numa_chunk_size << 20;
+ if (i == 1) mem_len -= 1024 * 1024;
+ if (i == 0) {
+ mem_len = 640 * 1024;
+ next_base = 1024 * 1024;
+ } else next_base = mem_base + mem_len;
+
+ numamem->flags = cpu_to_le32(1);
+ numamem->base_addr_low = mem_base & 0xFFFFFFFF;
+ numamem->base_addr_high = mem_base >> 32;
+
+ /* Cut out the PCI hole */
+
+ if (mem_base <= ram_size && next_base > ram_size && i > 0) {
+ mem_len -= next_base - ram_size;
+ if (mem_len > 0) {
+ numamem->length_low = mem_len & 0xFFFFFFFF;
+ numamem->length_high = mem_len >> 32;
+ numamem++; slots++;
+ numamem->type = SRAT_MEMORY;
+ numamem->length = sizeof(*numamem);
+ memset (numamem->proximity, 0 ,4);
+ numamem->proximity[0] = nodenr;
+ }
+ numamem->base_addr_low = 0;
+ numamem->base_addr_high = 1;
+ numamem->flags = cpu_to_le32(1);
+ mem_len = next_base - ram_size;
+ next_base += (1ULL << 32) - ram_size;
+ }
+ numamem->length_low = mem_len & 0xFFFFFFFF;
+ numamem->length_high = mem_len >> 32;
+ numamem++; slots++;
+ if (i != 0) nodenr++;
+ }
+ for (; slots < numanodes + 2; slots++) {
+ numamem->type = SRAT_MEMORY;
+ numamem->length = sizeof(*numamem);
+ memset (numamem->proximity, 0 ,4);
+ numamem->flags = 0;
+ numamem++;
+ }
+
+ acpi_build_table_header((struct acpi_table_header *)srat,
+ "SRAT", srat_size, 1);
+ }
}
/* SMBIOS entry point -- must be written to a 16-bit aligned address
diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
index 7d296c4..e5c8b6e 100644
--- a/qemu/hw/pc.c
+++ b/qemu/hw/pc.c
@@ -268,6 +268,8 @@ static void cmos_init(ram_addr_t ram_size, ram_addr_t
above_4g_mem_size,
rtc_set_memory(s, 0x34, val);
rtc_set_memory(s, 0x35, val >> 8);
+ rtc_set_memory(s, 0x3e, numnumanodes);
+
/* set the number of CPU */
rtc_set_memory(s, 0x5f, smp_cpus - 1);