Author: markj
Date: Mon Oct 22 20:13:51 2018
New Revision: 339616
URL: https://svnweb.freebsd.org/changeset/base/339616

Log:
  Make it possible to disable NUMA support with a tunable.
  
  This provides a chicken switch for anyone negatively impacted by
  enabling NUMA in the amd64 GENERIC kernel configuration.  With
  NUMA disabled at boot-time, information about the NUMA topology
  is not exposed to the rest of the kernel, and all of physical
  memory is viewed as coming from a single domain.
  
  This method still has some performance overhead relative to disabling
  NUMA support at compile time.
  
  PR:           231460
  Reviewed by:  alc, gallatin, kib
  MFC after:    1 week
  Sponsored by: The FreeBSD Foundation
  Differential Revision:        https://reviews.freebsd.org/D17439

Modified:
  head/share/man/man4/numa.4
  head/sys/arm64/arm64/mp_machdep.c
  head/sys/kern/kern_cpuset.c
  head/sys/vm/vm_phys.c
  head/sys/x86/acpica/srat.c

Modified: head/share/man/man4/numa.4
==============================================================================
--- head/share/man/man4/numa.4  Mon Oct 22 20:00:43 2018        (r339615)
+++ head/share/man/man4/numa.4  Mon Oct 22 20:13:51 2018        (r339616)
@@ -24,18 +24,16 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd July 10, 2018
+.Dd October 22, 2018
 .Dt NUMA 4
 .Os
 .Sh NAME
 .Nm NUMA
 .Nd Non-Uniform Memory Access
 .Sh SYNOPSIS
-.Cd options SMP
-.Cd options MAXMEMDOM=16
+.Cd options MAXMEMDOM
+.Cd options NUMA
 .Pp
-.In sys/cpuset.h
-.In sys/bus.h
 .Sh DESCRIPTION
 Non-Uniform Memory Access is a computer architecture design which
 involves unequal costs between processors, memory and IO devices
@@ -47,14 +45,26 @@ architecture, the latency to access specific memory or
 depends upon which processor the memory or device is attached to.
 Accessing memory local to a processor is faster than accessing memory
 that is connected to one of the other processors.
+.Fx
+implements NUMA-aware memory allocation policies.
+By default it attempts to ensure that allocations are balanced across
+each domain.
+Users may override the default domain selection policy using
+.Xr cpuset 1 .
 .Pp
 .Nm
-is enabled when the
+support is enabled when the
 .Cd NUMA
-option is used in a kernel configuration
-file and the
+option is specified in the kernel configuration file.
+Each platform defines the
 .Cd MAXMEMDOM
-option is set to a value greater than 1.
+constant, which specifies the maximum number of supported NUMA domains.
+This constant may be specified in the kernel configuration file.
+.Nm
+support can be disabled at boot time by setting the
+.Va vm.numa.disabled
+tunable to 1.
+Other values for this tunable are currently ignored.
 .Pp
 Thread and process
 .Nm
@@ -128,7 +138,7 @@ tool first appeared in
 .Fx 11.0
 and were removed in
 .Fx 12.0 .
-Current implementation appeared in
+The current implementation appeared in
 .Fx 12.0 .
 .Pp
 .Sh AUTHORS

Modified: head/sys/arm64/arm64/mp_machdep.c
==============================================================================
--- head/sys/arm64/arm64/mp_machdep.c   Mon Oct 22 20:00:43 2018        
(r339615)
+++ head/sys/arm64/arm64/mp_machdep.c   Mon Oct 22 20:13:51 2018        
(r339616)
@@ -576,11 +576,12 @@ cpu_init_fdt(u_int id, phandle_t node, u_int addr_size
                return (FALSE);
 
        /* Try to read the numa node of this cpu */
-       if (OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) > 0) {
-               __pcpu[id].pc_domain = domain;
-               if (domain < MAXMEMDOM)
-                       CPU_SET(id, &cpuset_domain[domain]);
-       }
+       if (vm_ndomains == 1 ||
+           OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0)
+               domain = 0;
+       __pcpu[id].pc_domain = domain;
+       if (domain < MAXMEMDOM)
+               CPU_SET(id, &cpuset_domain[domain]);
 
        return (TRUE);
 }

Modified: head/sys/kern/kern_cpuset.c
==============================================================================
--- head/sys/kern/kern_cpuset.c Mon Oct 22 20:00:43 2018        (r339615)
+++ head/sys/kern/kern_cpuset.c Mon Oct 22 20:13:51 2018        (r339616)
@@ -458,6 +458,12 @@ _domainset_create(struct domainset *domain, struct dom
        struct domainset *ndomain;
        int i, j, max;
 
+       KASSERT(domain->ds_cnt <= vm_ndomains,
+           ("invalid domain count in domainset %p", domain));
+       KASSERT(domain->ds_policy != DOMAINSET_POLICY_PREFER ||
+           domain->ds_prefer < vm_ndomains,
+           ("invalid preferred domain in domains %p", domain));
+
        mtx_lock_spin(&cpuset_lock);
        LIST_FOREACH(ndomain, &cpuset_domains, ds_link)
                if (domainset_equal(ndomain, domain))

Modified: head/sys/vm/vm_phys.c
==============================================================================
--- head/sys/vm/vm_phys.c       Mon Oct 22 20:00:43 2018        (r339615)
+++ head/sys/vm/vm_phys.c       Mon Oct 22 20:13:51 2018        (r339616)
@@ -597,11 +597,22 @@ vm_phys_register_domains(int ndomains, struct mem_affi
     int *locality)
 {
 #ifdef NUMA
-       int i;
+       int d, i;
 
-       vm_ndomains = ndomains;
-       mem_affinity = affinity;
-       mem_locality = locality;
+       /*
+        * For now the only override value that we support is 1, which
+        * effectively disables NUMA-awareness in the allocators.
+        */
+       d = 0;
+       TUNABLE_INT_FETCH("vm.numa.disabled", &d);
+       if (d)
+               ndomains = 1;
+
+       if (ndomains > 1) {
+               vm_ndomains = ndomains;
+               mem_affinity = affinity;
+               mem_locality = locality;
+       }
 
        for (i = 0; i < vm_ndomains; i++)
                DOMAINSET_SET(i, &all_domains);

Modified: head/sys/x86/acpica/srat.c
==============================================================================
--- head/sys/x86/acpica/srat.c  Mon Oct 22 20:00:43 2018        (r339615)
+++ head/sys/x86/acpica/srat.c  Mon Oct 22 20:13:51 2018        (r339616)
@@ -535,11 +535,7 @@ srat_set_cpus(void *dummy)
                if (!cpu->enabled)
                        panic("SRAT: CPU with APIC ID %u is not known",
                            pc->pc_apic_id);
-#ifdef NUMA
-               pc->pc_domain = cpu->domain;
-#else
-               pc->pc_domain = 0;
-#endif
+               pc->pc_domain = vm_ndomains > 1 ? cpu->domain : 0;
                CPU_SET(i, &cpuset_domain[pc->pc_domain]);
                if (bootverbose)
                        printf("SRAT: CPU %u has memory domain %d\n", i,
@@ -564,7 +560,7 @@ acpi_map_pxm_to_vm_domainid(int pxm)
 
        for (i = 0; i < ndomain; i++) {
                if (domain_pxm[i] == pxm)
-                       return (i);
+                       return (vm_ndomains > 1 ? i : 0);
        }
 
        return (-1);
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to