Module Name:    src
Committed By:   gutteridge
Date:           Tue Mar 12 02:26:16 UTC 2024

Modified Files:
        src/sys/arch/x86/x86: coretemp.c

Log Message:
coretemp.c: don't accept impossibly low TjMax values

r. 1.39 introduced a regression where instead of applying a reasonable
default maximum (as was done prior to that change), incorrect values
were accepted and applied, as failures to retrieve an expected MSR
value weren't accounted for.

Apply different logic for unexpectedly low vs. high maximums, with
distinct warnings for each. Also add another warning about a retrieval
failure right at the outset (which also just uses the default, then).

This change fundamentally doesn't address the fact that
__SHIFTOUT(msr, MSR_TEMP_TARGET_READOUT)
doesn't necessarily return a valid value. It just restores prior
behaviour, which is more reasonable than applying a zero value, which
started happening on some older hardware. (I infer this is most likely
an issue with dated generations of Intel hardware with this feature.)
The challenge is that this evidently isn't all documented properly
anywhere. Various "magic values" in this driver need further
investigation.

While here, also fix output so warnings are cleanly formatted, rather
than the slightly scrambled way they were appearing.

Tested on older Intel hardware I had on hand:
E7500 (now falls back to default 100 rather than 0)
E5540 (successfully retrieves 97, as before)
i5-3340M (successfully retrieves 105, as before)


To generate a diff of this commit:
cvs rdiff -u -r1.40 -r1.41 src/sys/arch/x86/x86/coretemp.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/x86/x86/coretemp.c
diff -u src/sys/arch/x86/x86/coretemp.c:1.40 src/sys/arch/x86/x86/coretemp.c:1.41
--- src/sys/arch/x86/x86/coretemp.c:1.40	Thu Feb 29 01:59:11 2024
+++ src/sys/arch/x86/x86/coretemp.c	Tue Mar 12 02:26:16 2024
@@ -1,4 +1,4 @@
-/* $NetBSD: coretemp.c,v 1.40 2024/02/29 01:59:11 gutteridge Exp $ */
+/* $NetBSD: coretemp.c,v 1.41 2024/03/12 02:26:16 gutteridge Exp $ */
 
 /*-
  * Copyright (c) 2011 The NetBSD Foundation, Inc.
@@ -61,7 +61,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: coretemp.c,v 1.40 2024/02/29 01:59:11 gutteridge Exp $");
+__KERNEL_RCSID(0, "$NetBSD: coretemp.c,v 1.41 2024/03/12 02:26:16 gutteridge Exp $");
 
 #include <sys/param.h>
 #include <sys/device.h>
@@ -110,7 +110,7 @@ static int	coretemp_match(device_t, cfda
 static void	coretemp_attach(device_t, device_t, void *);
 static int	coretemp_detach(device_t, int);
 static int	coretemp_quirks(struct cpu_info *);
-static void	coretemp_tjmax(device_t);
+static int	coretemp_tjmax(device_t);
 static void	coretemp_refresh(struct sysmon_envsys *, envsys_data_t *);
 static void	coretemp_refresh_xcall(void *, void *);
 
@@ -194,9 +194,10 @@ coretemp_attach(device_t parent, device_
 	if (sysmon_envsys_register(sc->sc_sme) != 0)
 		goto fail;
 
-	coretemp_tjmax(self);
-	aprint_verbose(", Tjmax=%d", sc->sc_tjmax);
-	aprint_normal("\n");
+	if (coretemp_tjmax(self) == 0) {
+		aprint_verbose(", Tjmax=%d", sc->sc_tjmax);
+		aprint_normal("\n");
+	}
 	return;
 
 fail:
@@ -258,7 +259,7 @@ coretemp_quirks(struct cpu_info *ci)
 	return 1;
 }
 
-void
+static int
 coretemp_tjmax(device_t self)
 {
 	struct coretemp_softc *sc = device_private(self);
@@ -288,17 +289,19 @@ coretemp_tjmax(device_t self)
 		if ((model < 0x17) && ((msr & __BIT(28)) == 0))
 			goto notee;
 
-		if (rdmsr_safe(MSR_IA32_EXT_CONFIG, &msr) == EFAULT)
-			return;
+		if (rdmsr_safe(MSR_IA32_EXT_CONFIG, &msr) == EFAULT) {
+			aprint_normal("\n");
+			aprint_error_dev(sc->sc_dev,
+			    "Failed to read MSR_IA32_EXT_CONFIG MSR. "
+			    "Using default (%d)\n", sc->sc_tjmax);
+			return 1;
+		}
 
-		if ((msr & __BIT(30)) != 0) {
+		if ((msr & __BIT(30)) != 0)
 			sc->sc_tjmax = 85;
-			return;
-		}
 	} else if (model == 0x17 && stepping == 0x06) {
 		/* The mobile Penryn family. */
 		sc->sc_tjmax = 105;
-		return;
 	} else if (model == 0x1c) {
 		if (stepping == 0x0a) {
 			/* 45nm Atom D400, N400 and D500 series */
@@ -307,21 +310,39 @@ coretemp_tjmax(device_t self)
 			sc->sc_tjmax = 90;
 	} else {
 notee:
-		/* Attempt to get Tj(max) from IA32_TEMPERATURE_TARGET. */
+		/* 
+		 * Attempt to get Tj(max) from IA32_TEMPERATURE_TARGET.
+		 * It is not fully known which CPU models have the MSR.
+		 */
 		if (rdmsr_safe(MSR_TEMPERATURE_TARGET, &msr) == EFAULT) {
+			aprint_normal("\n");
 			aprint_error_dev(sc->sc_dev,
 			    "Failed to read TEMPERATURE_TARGET MSR. "
-			    "Use the default (%d)\n", sc->sc_tjmax);
-			return;
+			    "Using default (%d)\n", sc->sc_tjmax);
+			return 1;
 		}
 
 		tjmax = __SHIFTOUT(msr, MSR_TEMP_TARGET_READOUT);
-		if ((tjmax < TJMAX_LIMIT_LOW) || (tjmax > TJMAX_LIMIT_HIGH))
+		if (tjmax < TJMAX_LIMIT_LOW) {
+			aprint_normal("\n");
+			aprint_error_dev(sc->sc_dev,
+			    "WARNING: Tjmax(%d) retrieved was below expected range, "
+				"using default (%d).\n", tjmax, sc->sc_tjmax);
+			return 1;
+		}
+
+		if (tjmax > TJMAX_LIMIT_HIGH) {
+			aprint_normal("\n");
 			aprint_error_dev(sc->sc_dev,
 			    "WARNING: Tjmax(%d) might exceed the limit.\n",
 			    tjmax);
+			sc->sc_tjmax = tjmax;
+			return 1;
+		}
 		sc->sc_tjmax = tjmax;
 	}
+
+	return 0;
 }
 
 static void

Reply via email to