Module Name: src Committed By: mrg Date: Sun Jan 12 09:29:18 UTC 2020
Modified Files: src/sys/arch/aarch64/aarch64: cpu.c cpufunc.c locore.S src/sys/arch/aarch64/include: cpu.h cpufunc.h src/sys/arch/arm/fdt: cpu_fdt.c src/sys/kern: subr_cpu.c Log Message: provide some semblance of valid cpu topology for big.little systems. while attaching cpus, if the FDT provides "capacity-dmips-mhz" track the fastest set, and call cpu_topology_set() with slow=true for any cpus that are not the fastest. bug fix for cpu_topology_set(): actually set ci_is_slow for slow cpus. with this change, and -current's recent scheduler changes, this means that long running processes run on the faster cores. on RK3399 based systems, i am seeing 20-50% speed ups for many tasks. XXX: all this can be made common with armv7 big.little. To generate a diff of this commit: cvs rdiff -u -r1.32 -r1.33 src/sys/arch/aarch64/aarch64/cpu.c cvs rdiff -u -r1.13 -r1.14 src/sys/arch/aarch64/aarch64/cpufunc.c cvs rdiff -u -r1.50 -r1.51 src/sys/arch/aarch64/aarch64/locore.S cvs rdiff -u -r1.17 -r1.18 src/sys/arch/aarch64/include/cpu.h cvs rdiff -u -r1.9 -r1.10 src/sys/arch/aarch64/include/cpufunc.h cvs rdiff -u -r1.30 -r1.31 src/sys/arch/arm/fdt/cpu_fdt.c cvs rdiff -u -r1.6 -r1.7 src/sys/kern/subr_cpu.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/aarch64/aarch64/cpu.c diff -u src/sys/arch/aarch64/aarch64/cpu.c:1.32 src/sys/arch/aarch64/aarch64/cpu.c:1.33 --- src/sys/arch/aarch64/aarch64/cpu.c:1.32 Thu Jan 9 16:23:41 2020 +++ src/sys/arch/aarch64/aarch64/cpu.c Sun Jan 12 09:29:18 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.c,v 1.32 2020/01/09 16:23:41 martin Exp $ */ +/* $NetBSD: cpu.c,v 1.33 2020/01/12 09:29:18 mrg Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.32 2020/01/09 16:23:41 martin Exp $"); +__KERNEL_RCSID(1, "$NetBSD: cpu.c,v 1.33 2020/01/12 09:29:18 mrg Exp $"); #include "locators.h" #include "opt_arm_debug.h" @@ -65,6 +65,7 @@ static void cpu_identify1(device_t self, static void cpu_identify2(device_t self, struct cpu_info *); static void cpu_setup_id(struct cpu_info *); static void cpu_setup_sysctl(device_t, struct cpu_info *); +static void cpu_do_topology(struct cpu_info *); #ifdef MULTIPROCESSOR uint64_t cpu_mpidr[MAXCPUS]; @@ -144,9 +145,9 @@ cpu_attach(device_t dv, cpuid_t id) ci->ci_dev = dv; dv->dv_private = ci; - aarch64_gettopology(ci, ci->ci_id.ac_mpidr); - + cpu_do_topology(ci); cpu_identify(ci->ci_dev, ci); + #ifdef MULTIPROCESSOR if (unit != 0) { mi_cpu_attach(ci); @@ -308,8 +309,8 @@ cpu_identify2(device_t self, struct cpu_ dfr0 = reg_id_aa64dfr0_el1_read(); - aprint_debug_dev(self, "midr=0x%" PRIx32 "\n", - (uint32_t)ci->ci_id.ac_midr); + aprint_debug_dev(self, "midr=0x%" PRIx32 " mpidr=0x%" PRIx32 "\n", + (uint32_t)ci->ci_id.ac_midr, (uint32_t)ci->ci_id.ac_mpidr); aprint_normal_dev(self, "revID=0x%" PRIx64, id->ac_revidr); /* ID_AA64DFR0_EL1 */ @@ -497,6 +498,43 @@ cpu_setup_sysctl(device_t dv, struct cpu CTL_CREATE, CTL_EOL); } +static void +cpu_do_topology(struct cpu_info *newci) +{ + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + prop_dictionary_t dict; + uint32_t capacity_dmips_mhz; + static uint32_t best_cap = 0; + + dict = device_properties(newci->ci_dev); + if (prop_dictionary_get_uint32(dict, "capacity_dmips_mhz", + &capacity_dmips_mhz)) { + newci->ci_capacity_dmips_mhz = capacity_dmips_mhz; + } else { + newci->ci_capacity_dmips_mhz = 0; + } + + if (newci->ci_capacity_dmips_mhz > best_cap) + best_cap = newci->ci_capacity_dmips_mhz; + + /* + * CPU_INFO_FOREACH() doesn't work for this CPU until mi_cpu_attach() + * is called and ncpu is bumped, so call it directly here. + */ + aarch64_set_topology(newci, newci->ci_id.ac_mpidr, + newci->ci_capacity_dmips_mhz < best_cap); + + /* + * Using saved largest capacity, refresh previous topology info. + * It's supposed to be OK to re-set topology. + */ + for (CPU_INFO_FOREACH(cii, ci)) { + aarch64_set_topology(ci, ci->ci_id.ac_mpidr, + ci->ci_capacity_dmips_mhz < best_cap); + } +} + #ifdef MULTIPROCESSOR void cpu_boot_secondary_processors(void) Index: src/sys/arch/aarch64/aarch64/cpufunc.c diff -u src/sys/arch/aarch64/aarch64/cpufunc.c:1.13 src/sys/arch/aarch64/aarch64/cpufunc.c:1.14 --- src/sys/arch/aarch64/aarch64/cpufunc.c:1.13 Thu Jan 9 16:35:03 2020 +++ src/sys/arch/aarch64/aarch64/cpufunc.c Sun Jan 12 09:29:18 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: cpufunc.c,v 1.13 2020/01/09 16:35:03 ad Exp $ */ +/* $NetBSD: cpufunc.c,v 1.14 2020/01/12 09:29:18 mrg Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -29,7 +29,7 @@ #include "opt_multiprocessor.h" #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.13 2020/01/09 16:35:03 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpufunc.c,v 1.14 2020/01/12 09:29:18 mrg Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -89,7 +89,7 @@ extract_cacheunit(int level, bool insn, } void -aarch64_gettopology(struct cpu_info * const ci, uint64_t mpidr) +aarch64_set_topology(struct cpu_info * const ci, uint64_t mpidr, bool slow) { if (mpidr & MPIDR_MT) { @@ -98,14 +98,14 @@ aarch64_gettopology(struct cpu_info * co __SHIFTOUT(mpidr, MPIDR_AFF1), __SHIFTOUT(mpidr, MPIDR_AFF0), 0, - false); + slow); } else { cpu_topology_set(ci, __SHIFTOUT(mpidr, MPIDR_AFF1), __SHIFTOUT(mpidr, MPIDR_AFF0), 0, 0, - false); + slow); } } Index: src/sys/arch/aarch64/aarch64/locore.S diff -u src/sys/arch/aarch64/aarch64/locore.S:1.50 src/sys/arch/aarch64/aarch64/locore.S:1.51 --- src/sys/arch/aarch64/aarch64/locore.S:1.50 Wed Jan 8 05:41:07 2020 +++ src/sys/arch/aarch64/aarch64/locore.S Sun Jan 12 09:29:18 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: locore.S,v 1.50 2020/01/08 05:41:07 ryo Exp $ */ +/* $NetBSD: locore.S,v 1.51 2020/01/12 09:29:18 mrg Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -38,7 +38,7 @@ #include <aarch64/hypervisor.h> #include "assym.h" -RCSID("$NetBSD: locore.S,v 1.50 2020/01/08 05:41:07 ryo Exp $") +RCSID("$NetBSD: locore.S,v 1.51 2020/01/12 09:29:18 mrg Exp $") #ifdef AARCH64_DEVICE_MEM_STRONGLY_ORDERED #define MAIR_DEVICE_MEM MAIR_DEVICE_nGnRnE @@ -187,7 +187,8 @@ vstart: /* get cache configuration */ mrs x0, tpidr_el1 /* curcpu */ mrs x1, mpidr_el1 - bl aarch64_gettopology + mov x2, #0 + bl aarch64_set_topology mov x0, xzr bl aarch64_getcacheinfo Index: src/sys/arch/aarch64/include/cpu.h diff -u src/sys/arch/aarch64/include/cpu.h:1.17 src/sys/arch/aarch64/include/cpu.h:1.18 --- src/sys/arch/aarch64/include/cpu.h:1.17 Sun Jan 5 20:17:43 2020 +++ src/sys/arch/aarch64/include/cpu.h Sun Jan 12 09:29:18 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.17 2020/01/05 20:17:43 ad Exp $ */ +/* $NetBSD: cpu.h,v 1.18 2020/01/12 09:29:18 mrg Exp $ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. @@ -93,6 +93,9 @@ struct cpu_info { struct evcnt ci_vfp_save; struct evcnt ci_vfp_release; + /* FDT or similar supplied "cpu capacity" */ + uint32_t ci_capacity_dmips_mhz; + /* interrupt controller */ u_int ci_gic_redist; /* GICv3 redistributor index */ uint64_t ci_gic_sgir; /* GICv3 SGIR target */ Index: src/sys/arch/aarch64/include/cpufunc.h diff -u src/sys/arch/aarch64/include/cpufunc.h:1.9 src/sys/arch/aarch64/include/cpufunc.h:1.10 --- src/sys/arch/aarch64/include/cpufunc.h:1.9 Thu Dec 19 09:47:42 2019 +++ src/sys/arch/aarch64/include/cpufunc.h Sun Jan 12 09:29:18 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: cpufunc.h,v 1.9 2019/12/19 09:47:42 ryo Exp $ */ +/* $NetBSD: cpufunc.h,v 1.10 2020/01/12 09:29:18 mrg Exp $ */ /* * Copyright (c) 2017 Ryo Shimizu <r...@nerv.org> @@ -69,7 +69,7 @@ extern u_int aarch64_cache_prefer_mask; extern u_int cputype; /* compat arm */ int set_cpufuncs(void); -void aarch64_gettopology(struct cpu_info *, uint64_t); +void aarch64_set_topology(struct cpu_info *, uint64_t, bool); void aarch64_getcacheinfo(int); void aarch64_printcacheinfo(device_t); Index: src/sys/arch/arm/fdt/cpu_fdt.c diff -u src/sys/arch/arm/fdt/cpu_fdt.c:1.30 src/sys/arch/arm/fdt/cpu_fdt.c:1.31 --- src/sys/arch/arm/fdt/cpu_fdt.c:1.30 Fri Nov 1 13:22:08 2019 +++ src/sys/arch/arm/fdt/cpu_fdt.c Sun Jan 12 09:29:18 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu_fdt.c,v 1.30 2019/11/01 13:22:08 bad Exp $ */ +/* $NetBSD: cpu_fdt.c,v 1.31 2020/01/12 09:29:18 mrg Exp $ */ /*- * Copyright (c) 2017 Jared McNeill <jmcne...@invisible.ca> @@ -30,7 +30,7 @@ #include "psci_fdt.h" #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cpu_fdt.c,v 1.30 2019/11/01 13:22:08 bad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cpu_fdt.c,v 1.31 2020/01/12 09:29:18 mrg Exp $"); #include <sys/param.h> #include <sys/atomic.h> @@ -126,10 +126,21 @@ cpu_fdt_attach(device_t parent, device_t enum cpu_fdt_type type; bus_addr_t mpidr; cpuid_t cpuid; + const uint32_t *cap_ptr; + int len; sc->sc_dev = self; sc->sc_phandle = phandle; + cap_ptr = fdtbus_get_prop(phandle, "capacity-dmips-mhz", &len); + if (cap_ptr && len == 4) { + prop_dictionary_t dict = device_properties(self); + uint32_t capacity_dmips_mhz = be32toh(*cap_ptr); + + prop_dictionary_set_uint32(dict, "capacity_dmips_mhz", + capacity_dmips_mhz); + } + type = of_search_compatible(phandle, compat_data)->data; switch (type) { Index: src/sys/kern/subr_cpu.c diff -u src/sys/kern/subr_cpu.c:1.6 src/sys/kern/subr_cpu.c:1.7 --- src/sys/kern/subr_cpu.c:1.6 Thu Jan 9 16:35:03 2020 +++ src/sys/kern/subr_cpu.c Sun Jan 12 09:29:18 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: subr_cpu.c,v 1.6 2020/01/09 16:35:03 ad Exp $ */ +/* $NetBSD: subr_cpu.c,v 1.7 2020/01/12 09:29:18 mrg Exp $ */ /*- * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019, 2020 @@ -61,7 +61,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: subr_cpu.c,v 1.6 2020/01/09 16:35:03 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_cpu.c,v 1.7 2020/01/12 09:29:18 mrg Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -152,6 +152,7 @@ cpu_topology_set(struct cpu_info *ci, u_ ci->ci_core_id = core_id; ci->ci_smt_id = smt_id; ci->ci_numa_id = numa_id; + ci->ci_is_slow = slow; for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) { ci->ci_sibling[rel] = ci; ci->ci_nsibling[rel] = 1;