Module Name: src Committed By: gutteridge Date: Tue Mar 12 02:26:16 UTC 2024
Modified Files: src/sys/arch/x86/x86: coretemp.c Log Message: coretemp.c: don't accept impossibly low TjMax values r. 1.39 introduced a regression where instead of applying a reasonable default maximum (as was done prior to that change), incorrect values were accepted and applied, as failures to retrieve an expected MSR value weren't accounted for. Apply different logic for unexpectedly low vs. high maximums, with distinct warnings for each. Also add another warning about a retrieval failure right at the outset (which also just uses the default, then). This change fundamentally doesn't address the fact that __SHIFTOUT(msr, MSR_TEMP_TARGET_READOUT) doesn't necessarily return a valid value. It just restores prior behaviour, which is more reasonable than applying a zero value, which started happening on some older hardware. (I infer this is most likely an issue with dated generations of Intel hardware with this feature.) The challenge is that this evidently isn't all documented properly anywhere. Various "magic values" in this driver need further investigation. While here, also fix output so warnings are cleanly formatted, rather than the slightly scrambled way they were appearing. Tested on older Intel hardware I had on hand: E7500 (now falls back to default 100 rather than 0) E5540 (successfully retrieves 97, as before) i5-3340M (successfully retrieves 105, as before) To generate a diff of this commit: cvs rdiff -u -r1.40 -r1.41 src/sys/arch/x86/x86/coretemp.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/x86/coretemp.c diff -u src/sys/arch/x86/x86/coretemp.c:1.40 src/sys/arch/x86/x86/coretemp.c:1.41 --- src/sys/arch/x86/x86/coretemp.c:1.40 Thu Feb 29 01:59:11 2024 +++ src/sys/arch/x86/x86/coretemp.c Tue Mar 12 02:26:16 2024 @@ -1,4 +1,4 @@ -/* $NetBSD: coretemp.c,v 1.40 2024/02/29 01:59:11 gutteridge Exp $ */ +/* $NetBSD: coretemp.c,v 1.41 2024/03/12 02:26:16 gutteridge Exp $ */ /*- * Copyright (c) 2011 The NetBSD Foundation, Inc. @@ -61,7 +61,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: coretemp.c,v 1.40 2024/02/29 01:59:11 gutteridge Exp $"); +__KERNEL_RCSID(0, "$NetBSD: coretemp.c,v 1.41 2024/03/12 02:26:16 gutteridge Exp $"); #include <sys/param.h> #include <sys/device.h> @@ -110,7 +110,7 @@ static int coretemp_match(device_t, cfda static void coretemp_attach(device_t, device_t, void *); static int coretemp_detach(device_t, int); static int coretemp_quirks(struct cpu_info *); -static void coretemp_tjmax(device_t); +static int coretemp_tjmax(device_t); static void coretemp_refresh(struct sysmon_envsys *, envsys_data_t *); static void coretemp_refresh_xcall(void *, void *); @@ -194,9 +194,10 @@ coretemp_attach(device_t parent, device_ if (sysmon_envsys_register(sc->sc_sme) != 0) goto fail; - coretemp_tjmax(self); - aprint_verbose(", Tjmax=%d", sc->sc_tjmax); - aprint_normal("\n"); + if (coretemp_tjmax(self) == 0) { + aprint_verbose(", Tjmax=%d", sc->sc_tjmax); + aprint_normal("\n"); + } return; fail: @@ -258,7 +259,7 @@ coretemp_quirks(struct cpu_info *ci) return 1; } -void +static int coretemp_tjmax(device_t self) { struct coretemp_softc *sc = device_private(self); @@ -288,17 +289,19 @@ coretemp_tjmax(device_t self) if ((model < 0x17) && ((msr & __BIT(28)) == 0)) goto notee; - if (rdmsr_safe(MSR_IA32_EXT_CONFIG, &msr) == EFAULT) - return; + if (rdmsr_safe(MSR_IA32_EXT_CONFIG, &msr) == EFAULT) { + aprint_normal("\n"); + aprint_error_dev(sc->sc_dev, + "Failed to read MSR_IA32_EXT_CONFIG MSR. " + "Using default (%d)\n", sc->sc_tjmax); + return 1; + } - if ((msr & __BIT(30)) != 0) { + if ((msr & __BIT(30)) != 0) sc->sc_tjmax = 85; - return; - } } else if (model == 0x17 && stepping == 0x06) { /* The mobile Penryn family. */ sc->sc_tjmax = 105; - return; } else if (model == 0x1c) { if (stepping == 0x0a) { /* 45nm Atom D400, N400 and D500 series */ @@ -307,21 +310,39 @@ coretemp_tjmax(device_t self) sc->sc_tjmax = 90; } else { notee: - /* Attempt to get Tj(max) from IA32_TEMPERATURE_TARGET. */ + /* + * Attempt to get Tj(max) from IA32_TEMPERATURE_TARGET. + * It is not fully known which CPU models have the MSR. + */ if (rdmsr_safe(MSR_TEMPERATURE_TARGET, &msr) == EFAULT) { + aprint_normal("\n"); aprint_error_dev(sc->sc_dev, "Failed to read TEMPERATURE_TARGET MSR. " - "Use the default (%d)\n", sc->sc_tjmax); - return; + "Using default (%d)\n", sc->sc_tjmax); + return 1; } tjmax = __SHIFTOUT(msr, MSR_TEMP_TARGET_READOUT); - if ((tjmax < TJMAX_LIMIT_LOW) || (tjmax > TJMAX_LIMIT_HIGH)) + if (tjmax < TJMAX_LIMIT_LOW) { + aprint_normal("\n"); + aprint_error_dev(sc->sc_dev, + "WARNING: Tjmax(%d) retrieved was below expected range, " + "using default (%d).\n", tjmax, sc->sc_tjmax); + return 1; + } + + if (tjmax > TJMAX_LIMIT_HIGH) { + aprint_normal("\n"); aprint_error_dev(sc->sc_dev, "WARNING: Tjmax(%d) might exceed the limit.\n", tjmax); + sc->sc_tjmax = tjmax; + return 1; + } sc->sc_tjmax = tjmax; } + + return 0; } static void