Module Name: src Committed By: cegger Date: Wed Nov 25 13:17:06 UTC 2009
Modified Files: src/sys/dev/acpi: files.acpi Added Files: src/sys/dev/acpi: acpi_srat.c acpi_srat.h Log Message: Add ACPI SRAT parser. This is a part of NUMA support. Tested on 1-node, 2-node and 8-node machines. Patch presented on tech-kern@, port-i386@ and port-am...@. No comments. To generate a diff of this commit: cvs rdiff -u -r0 -r1.1 src/sys/dev/acpi/acpi_srat.c \ src/sys/dev/acpi/acpi_srat.h cvs rdiff -u -r1.60 -r1.61 src/sys/dev/acpi/files.acpi Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/acpi/files.acpi diff -u src/sys/dev/acpi/files.acpi:1.60 src/sys/dev/acpi/files.acpi:1.61 --- src/sys/dev/acpi/files.acpi:1.60 Fri Oct 2 16:47:52 2009 +++ src/sys/dev/acpi/files.acpi Wed Nov 25 13:17:06 2009 @@ -1,4 +1,4 @@ -# $NetBSD: files.acpi,v 1.60 2009/10/02 16:47:52 jmcneill Exp $ +# $NetBSD: files.acpi,v 1.61 2009/11/25 13:17:06 cegger Exp $ include "dev/acpi/acpica/files.acpica" @@ -20,6 +20,7 @@ file dev/acpi/acpi_quirks.c acpi file dev/acpi/acpi_timer.c acpi file dev/acpi/acpi_wakedev.c acpi +file dev/acpi/acpi_srat.c acpi # ACPI/apm emulation. attach apm at acpiapmbus with acpiapm: sysmon_envsys Added files: Index: src/sys/dev/acpi/acpi_srat.c diff -u /dev/null src/sys/dev/acpi/acpi_srat.c:1.1 --- /dev/null Wed Nov 25 13:17:06 2009 +++ src/sys/dev/acpi/acpi_srat.c Wed Nov 25 13:17:06 2009 @@ -0,0 +1,513 @@ +/* $NetBSD $ */ + +/* + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christoph Egger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD $"); + +#include <sys/param.h> +#include <sys/systm.h> + +#include <sys/kmem.h> + +#include <dev/acpi/acpica.h> +#include <dev/acpi/acpivar.h> +#include <dev/acpi/acpi_srat.h> + +static ACPI_TABLE_SRAT *srat; + +struct acpisrat_node { + acpisrat_nodeid_t nodeid; + uint32_t ncpus; /* Number of cpus in this node */ + struct acpisrat_cpu **cpu; /* Array of cpus */ + uint32_t nmems; /* Number of memory ranges in this node */ + struct acpisrat_mem **mem; /* Array of memory ranges */ +}; + +static uint32_t nnodes; /* Number of NUMA nodes */ +static struct acpisrat_node *node_array; /* Array of NUMA nodes */ +static uint32_t ncpus; /* Number of CPUs */ +static struct acpisrat_cpu *cpu_array; /* Array of cpus */ +static uint32_t nmems; /* Number of Memory ranges */ +static struct acpisrat_mem *mem_array; + + +struct cpulist { + struct acpisrat_cpu cpu; + TAILQ_ENTRY(cpulist) entry; +}; + +static TAILQ_HEAD(, cpulist) cpulisthead; + +#define CPU_INIT TAILQ_INIT(&cpulisthead); +#define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry) +#define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry) +#define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry) +#define CPU_FIRST TAILQ_FIRST(&cpulisthead) + + +struct memlist { + struct acpisrat_mem mem; + TAILQ_ENTRY(memlist) entry; +}; + +static TAILQ_HEAD(, memlist) memlisthead; + +#define MEM_INIT TAILQ_INIT(&memlisthead) +#define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry) +#define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry) +#define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry) +#define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry) +#define MEM_FIRST TAILQ_FIRST(&memlisthead) + + +static struct cpulist * +cpu_alloc(void) +{ + return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP); +} + +static void +cpu_free(struct cpulist *c) +{ + kmem_free(c, sizeof(struct cpulist)); +} + +#if 0 +static struct cpulist * +cpu_get(acpisrat_nodeid_t nodeid) +{ + struct cpulist *tmp; + + CPU_FOREACH(tmp) { + if (tmp->cpu.nodeid == nodeid) + return tmp; + } + + return NULL; +} +#endif + +static struct memlist * +mem_alloc(void) +{ + return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP); +} + +static void +mem_free(struct memlist *m) +{ + kmem_free(m, sizeof(struct memlist)); +} + +static struct memlist * +mem_get(acpisrat_nodeid_t nodeid) +{ + struct memlist *tmp; + + MEM_FOREACH(tmp) { + if (tmp->mem.nodeid == nodeid) + return tmp; + } + + return NULL; +} + + +bool +acpisrat_exist(void) +{ + ACPI_TABLE_HEADER *table; + ACPI_STATUS rv; + + rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table); + if (ACPI_FAILURE(rv)) + return false; + + /* Check if header is valid */ + if (table == NULL) + return false; + + if (table->Length == 0xffffffff) + return false; + + srat = (ACPI_TABLE_SRAT *)table; + + return true; +} + +static int +acpisrat_parse(void) +{ + ACPI_SUBTABLE_HEADER *subtable; + ACPI_SRAT_CPU_AFFINITY *srat_cpu; + ACPI_SRAT_MEM_AFFINITY *srat_mem; + ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic; + + acpisrat_nodeid_t nodeid; + struct cpulist *cpuentry = NULL; + struct memlist *mementry; + uint32_t srat_pos; + bool ignore_cpu_affinity = false; + + KASSERT(srat != NULL); + + /* Content starts right after the header */ + srat_pos = sizeof(ACPI_TABLE_SRAT); + + while (srat_pos < srat->Header.Length) { + subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos); + srat_pos += subtable->Length; + + switch (subtable->Type) { + case ACPI_SRAT_TYPE_CPU_AFFINITY: + if (ignore_cpu_affinity) + continue; + + srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable; + nodeid = (srat_cpu->ProximityDomainHi[2] << 24) | + (srat_cpu->ProximityDomainHi[1] << 16) | + (srat_cpu->ProximityDomainHi[0] << 8) | + (srat_cpu->ProximityDomainLo); + + cpuentry = cpu_alloc(); + if (cpuentry == NULL) + return ENOMEM; + CPU_ADD(cpuentry); + + cpuentry->cpu.nodeid = nodeid; + cpuentry->cpu.apicid = srat_cpu->ApicId; + cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid; + cpuentry->cpu.flags = srat_cpu->Flags; + cpuentry->cpu.clockdomain = srat_cpu->ClockDomain; + break; + + case ACPI_SRAT_TYPE_MEMORY_AFFINITY: + srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable; + nodeid = srat_mem->ProximityDomain; + + mementry = mem_alloc(); + if (mementry == NULL) + return ENOMEM; + MEM_ADD(mementry); + + mementry->mem.nodeid = nodeid; + mementry->mem.baseaddress = srat_mem->BaseAddress; + mementry->mem.length = srat_mem->Length; + mementry->mem.flags = srat_mem->Flags; + break; + + case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: + srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable; + nodeid = srat_x2apic->ProximityDomain; + + /* This table entry overrides + * ACPI_SRAT_TYPE_CPU_AFFINITY. + */ + if (!ignore_cpu_affinity) { + struct cpulist *citer; + while ((citer = CPU_FIRST) != NULL) { + CPU_REM(citer); + cpu_free(citer); + } + ignore_cpu_affinity = true; + } + + cpuentry = cpu_alloc(); + if (cpuentry == NULL) + return ENOMEM; + CPU_ADD(cpuentry); + + cpuentry->cpu.nodeid = nodeid; + cpuentry->cpu.apicid = srat_x2apic->ApicId; + cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain; + cpuentry->cpu.flags = srat_x2apic->Flags; + break; + + case ACPI_SRAT_TYPE_RESERVED: + printf("ACPI SRAT subtable reserved, length: 0x%x\n", + subtable->Length); + break; + } + } + + return 0; +} + +static int +acpisrat_quirks(void) +{ + struct cpulist *citer; + struct memlist *mem, *miter; + + /* Some sanity checks. */ + + /* Deal with holes in the memory nodes. + * BIOS doesn't enlist memory nodes which + * don't have any memory modules plugged in. + * This behaviour has been observed on AMD machines. + * + * Do that by searching for CPUs in NUMA nodes + * which don't exist in the memory and then insert + * a zero memory range for the missing node. + */ + CPU_FOREACH(citer) { + mem = mem_get(citer->cpu.nodeid); + if (mem != NULL) + continue; + mem = mem_alloc(); + if (mem == NULL) + return ENOMEM; + mem->mem.nodeid = citer->cpu.nodeid; + /* all other fields are already zero filled */ + + MEM_FOREACH(miter) { + if (miter->mem.nodeid < citer->cpu.nodeid) + continue; + MEM_ADD_BEFORE(mem, miter); + break; + } + } + + return 0; +} + +int +acpisrat_init(void) +{ + if (!acpisrat_exist()) + return EEXIST; + return acpisrat_refresh(); +} + +int +acpisrat_refresh(void) +{ + int rc, i, j, k; + struct cpulist *citer; + struct memlist *miter; + uint32_t cnodes = 0, mnodes = 0; + + CPU_INIT; + MEM_INIT; + + rc = acpisrat_parse(); + if (rc) + return rc; + + rc = acpisrat_quirks(); + if (rc) + return rc; + + /* cleanup resources */ + rc = acpisrat_exit(); + if (rc) + return rc; + + nnodes = 0; + ncpus = 0; + CPU_FOREACH(citer) { + cnodes = MAX(citer->cpu.nodeid, cnodes); + ncpus++; + } + + nmems = 0; + MEM_FOREACH(miter) { + mnodes = MAX(miter->mem.nodeid, mnodes); + nmems++; + } + + nnodes = MAX(cnodes, mnodes) + 1; + + node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node), + KM_NOSLEEP); + if (node_array == NULL) + return ENOMEM; + + cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu), + KM_NOSLEEP); + if (cpu_array == NULL) + return ENOMEM; + + mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem), + KM_NOSLEEP); + if (mem_array == NULL) + return ENOMEM; + + i = 0; + CPU_FOREACH(citer) { + memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu)); + i++; + node_array[citer->cpu.nodeid].ncpus++; + } + + i = 0; + MEM_FOREACH(miter) { + memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem)); + i++; + node_array[miter->mem.nodeid].nmems++; + } + + for (i = 0; i < nnodes; i++) { + node_array[i].nodeid = i; + + node_array[i].cpu = kmem_zalloc(node_array[i].ncpus * + sizeof(struct acpisrat_cpu *), KM_NOSLEEP); + node_array[i].mem = kmem_zalloc(node_array[i].nmems * + sizeof(struct acpisrat_mem *), KM_NOSLEEP); + + k = 0; + for (j = 0; j < ncpus; j++) { + if (cpu_array[j].nodeid != i) + continue; + node_array[i].cpu[k] = &cpu_array[j]; + k++; + } + + k = 0; + for (j = 0; j < nmems; j++) { + if (mem_array[j].nodeid != i) + continue; + node_array[i].mem[k] = &mem_array[j]; + k++; + } + } + + while ((citer = CPU_FIRST) != NULL) { + CPU_REM(citer); + cpu_free(citer); + } + + while ((miter = MEM_FIRST) != NULL) { + MEM_REM(miter); + mem_free(miter); + } + + return 0; +} + + +int +acpisrat_exit(void) +{ + int i; + + if (node_array) { + for (i = 0; i < nnodes; i++) { + if (node_array[i].cpu) + kmem_free(node_array[i].cpu, + node_array[i].ncpus * sizeof(struct acpisrat_cpu *)); + if (node_array[i].mem) + kmem_free(node_array[i].mem, + node_array[i].nmems * sizeof(struct acpisrat_mem *)); + } + kmem_free(node_array, nnodes * sizeof(struct acpisrat_node)); + } + node_array = NULL; + + if (cpu_array) + kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu)); + cpu_array = NULL; + + if (mem_array) + kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem)); + mem_array = NULL; + + nnodes = 0; + ncpus = 0; + nmems = 0; + + return 0; +} + + +void +acpisrat_dump(void) +{ + uint32_t i, j, nn, nc, nm; + struct acpisrat_cpu c; + struct acpisrat_mem m; + + nn = acpisrat_nodes(); + aprint_debug("SRAT: %u NUMA nodes\n", nn); + for (i = 0; i < nn; i++) { + nc = acpisrat_node_cpus(i); + for (j = 0; j < nc; j++) { + acpisrat_cpu(i, j, &c); + aprint_debug("SRAT: node %u cpu %u " + "(apic %u, sapic %u, flags %u, clockdomain %u)\n", + c.nodeid, j, c.apicid, c.sapiceid, c.flags, + c.clockdomain); + } + + nm = acpisrat_node_memoryranges(i); + for (j = 0; j < nm; j++) { + acpisrat_mem(i, j, &m); + aprint_debug("SRAT: node %u memory range %u (0x%" + PRIx64" - 0x%"PRIx64" flags %u)\n", + m.nodeid, j, m.baseaddress, + m.baseaddress + m.length, m.flags); + } + } +} + +uint32_t +acpisrat_nodes(void) +{ + return nnodes; +} + +uint32_t +acpisrat_node_cpus(acpisrat_nodeid_t nodeid) +{ + return node_array[nodeid].ncpus; +} + +uint32_t +acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid) +{ + return node_array[nodeid].nmems; +} + +void +acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum, + struct acpisrat_cpu *c) +{ + memcpy(c, node_array[nodeid].cpu[cpunum], + sizeof(struct acpisrat_cpu)); +} + +void +acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange, + struct acpisrat_mem *mem) +{ + memcpy(mem, node_array[nodeid].mem[memrange], + sizeof(struct acpisrat_mem)); +} Index: src/sys/dev/acpi/acpi_srat.h diff -u /dev/null src/sys/dev/acpi/acpi_srat.h:1.1 --- /dev/null Wed Nov 25 13:17:06 2009 +++ src/sys/dev/acpi/acpi_srat.h Wed Nov 25 13:17:06 2009 @@ -0,0 +1,98 @@ +/* $NetBSD $ */ + +/* + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christoph Egger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ACPI_SRAT_H +#define ACPI_SRAT_H + +typedef uint32_t acpisrat_nodeid_t; + +struct acpisrat_cpu { + acpisrat_nodeid_t nodeid; + uint32_t apicid; + uint32_t sapiceid; + uint32_t flags; + + /* clockdomain has a meaningful value when the ACPI MADT table has + * ACPI_MADT_TYPE_LOCAL_X2APIC and/or ACPI_MADT_TYPE_LOCAL_X2APIC_NMI + * entries or ACPI CPU device have a _CDM. + */ + uint32_t clockdomain; +}; + +struct acpisrat_mem { + acpisrat_nodeid_t nodeid; + uint64_t baseaddress; + uint64_t length; + uint32_t flags; +}; + +/* Returns true if ACPI SRAT table is available. + * + * If table does not exist, all functions below + * have undefined behaviour. + */ +bool acpisrat_exist(void); + +/* Initializes parser. Must be the first function + * being called when table is available. + */ +int acpisrat_init(void); + +/* Re-parse ACPI SRAT table. Useful after + * hotplugging cpu or RAM. + */ +int acpisrat_refresh(void); + +/* Free allocated memory. Should be called + * when acpisrat is no longer of any use. + */ +int acpisrat_exit(void); + +void acpisrat_dump(void); + +/* Get number of NUMA nodes */ +uint32_t acpisrat_nodes(void); + +/* Get number of cpus in the node. + * 0 means, this is a cpu-less node. + */ +uint32_t acpisrat_node_cpus(acpisrat_nodeid_t); + +/* Get number of memory ranges in the node + * 0 means, this node has no RAM. + */ +uint32_t acpisrat_node_memoryranges(acpisrat_nodeid_t); + +/* Retrieve cpu and memory info. */ +void acpisrat_cpu(acpisrat_nodeid_t, uint32_t cpunum, struct acpisrat_cpu *); +void acpisrat_mem(acpisrat_nodeid_t, uint32_t memrange, struct acpisrat_mem *); + +#endif /* ACPI_SRAT_H */