Module Name:    src
Committed By:   msaitoh
Date:           Thu Mar 10 04:14:34 UTC 2022

Modified Files:
        src/sys/dev/pci/ixgbe: ixgbe.c ixgbe.h

Log Message:
Print ECC, PHY and temp error log using with ratecheck().

 - The ratecheck() is for just in case. All of the interrupts might occur
   only once, but I don't know whether it's true or not. For the fan failure,
   it seems it occurs only once.
 - All of the interval is 60s.


To generate a diff of this commit:
cvs rdiff -u -r1.311 -r1.312 src/sys/dev/pci/ixgbe/ixgbe.c
cvs rdiff -u -r1.84 -r1.85 src/sys/dev/pci/ixgbe/ixgbe.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/pci/ixgbe/ixgbe.c
diff -u src/sys/dev/pci/ixgbe/ixgbe.c:1.311 src/sys/dev/pci/ixgbe/ixgbe.c:1.312
--- src/sys/dev/pci/ixgbe/ixgbe.c:1.311	Thu Mar 10 04:00:32 2022
+++ src/sys/dev/pci/ixgbe/ixgbe.c	Thu Mar 10 04:14:34 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.311 2022/03/10 04:00:32 msaitoh Exp $ */
+/* $NetBSD: ixgbe.c,v 1.312 2022/03/10 04:14:34 msaitoh Exp $ */
 
 /******************************************************************************
 
@@ -64,7 +64,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ixgbe.c,v 1.311 2022/03/10 04:00:32 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ixgbe.c,v 1.312 2022/03/10 04:14:34 msaitoh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -424,6 +424,9 @@ static int (*ixgbe_ring_empty)(struct if
 #endif
 #define IXGBE_WORKQUEUE_PRI PRI_SOFTNET
 
+/* Interval between reports of errors */
+static const struct timeval ixgbe_errlog_intrvl = { 60, 0 };	/* 60s */
+
 /************************************************************************
  * ixgbe_initialize_rss_mapping
  ************************************************************************/
@@ -3230,10 +3233,10 @@ ixgbe_intr_admin_common(struct adapter *
 #endif
 
 		if (eicr & IXGBE_EICR_ECC) {
-			device_printf(adapter->dev,
-			    "CRITICAL: ECC ERROR!! Please Reboot!!\n");
-			/* Disable interrupt to prevent log spam */
-			*eims_disable |= IXGBE_EICR_ECC;
+			if (ratecheck(&adapter->lasterr_time,
+			    &ixgbe_errlog_intrvl))
+				device_printf(adapter->dev,
+				    "CRITICAL: ECC ERROR!! Please Reboot!!\n");
 		}
 
 		/* Check for over temp condition */
@@ -3242,32 +3245,32 @@ ixgbe_intr_admin_common(struct adapter *
 			case ixgbe_mac_X550EM_a:
 				if (!(eicr & IXGBE_EICR_GPI_SDP0_X550EM_a))
 					break;
-				/* Disable interrupt to prevent log spam */
-				*eims_disable |= IXGBE_EICR_GPI_SDP0_X550EM_a;
-
 				retval = hw->phy.ops.check_overtemp(hw);
 				if (retval != IXGBE_ERR_OVERTEMP)
 					break;
-				device_printf(adapter->dev,
-				    "CRITICAL: OVER TEMP!! "
-				    "PHY IS SHUT DOWN!!\n");
-				device_printf(adapter->dev,
-				    "System shutdown required!\n");
+				if (ratecheck(&adapter->lasterr_time,
+				    &ixgbe_errlog_intrvl)) {
+					device_printf(adapter->dev,
+					    "CRITICAL: OVER TEMP!! "
+					    "PHY IS SHUT DOWN!!\n");
+					device_printf(adapter->dev,
+					    "System shutdown required!\n");
+				}
 				break;
 			default:
 				if (!(eicr & IXGBE_EICR_TS))
 					break;
-				/* Disable interrupt to prevent log spam */
-				*eims_disable |= IXGBE_EIMS_TS;
-
 				retval = hw->phy.ops.check_overtemp(hw);
 				if (retval != IXGBE_ERR_OVERTEMP)
 					break;
-				device_printf(adapter->dev,
-				    "CRITICAL: OVER TEMP!! "
-				    "PHY IS SHUT DOWN!!\n");
-				device_printf(adapter->dev,
-				    "System shutdown required!\n");
+				if (ratecheck(&adapter->lasterr_time,
+				    &ixgbe_errlog_intrvl)) {
+					device_printf(adapter->dev,
+					    "CRITICAL: OVER TEMP!! "
+					    "PHY IS SHUT DOWN!!\n");
+					device_printf(adapter->dev,
+					    "System shutdown required!\n");
+				}
 				break;
 			}
 		}
@@ -3281,13 +3284,8 @@ ixgbe_intr_admin_common(struct adapter *
 	}
 
 	/* Check for fan failure */
-	if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) {
-		retval = ixgbe_check_fan_failure(adapter, eicr, true);
-		if (retval == IXGBE_ERR_FAN_FAILURE) {
-			/* Disable interrupt to prevent log spam */
-			*eims_disable |= IXGBE_EIMS_GPI_SDP1_BY_MAC(hw);
-		}
-	}
+	if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL)
+		ixgbe_check_fan_failure(adapter, eicr, true);
 
 	/* External PHY interrupt */
 	if ((hw->phy.type == ixgbe_phy_x550em_ext_t) &&
@@ -6609,13 +6607,20 @@ ixgbe_check_fan_failure(struct adapter *
 	mask = (in_interrupt) ? IXGBE_EICR_GPI_SDP1_BY_MAC(&adapter->hw) :
 	    IXGBE_ESDP_SDP1;
 
-	if (reg & mask) {
+	if ((reg & mask) == 0)
+		return IXGBE_SUCCESS;
+
+	/*
+	 * Use ratecheck() just in case interrupt occur frequently.
+	 * When EXPX9501AT's fan stopped, interrupt occurred only once,
+	 * an red LED on the board turned on and link never up until
+	 * power off.
+	 */
+	if (ratecheck(&adapter->lasterr_time, &ixgbe_errlog_intrvl))
 		device_printf(adapter->dev,
 		    "\nCRITICAL: FAN FAILURE!! REPLACE IMMEDIATELY!!\n");
-		return IXGBE_ERR_FAN_FAILURE;
-	}
 
-	return IXGBE_SUCCESS;
+	return IXGBE_ERR_FAN_FAILURE;
 } /* ixgbe_check_fan_failure */
 
 /************************************************************************

Index: src/sys/dev/pci/ixgbe/ixgbe.h
diff -u src/sys/dev/pci/ixgbe/ixgbe.h:1.84 src/sys/dev/pci/ixgbe/ixgbe.h:1.85
--- src/sys/dev/pci/ixgbe/ixgbe.h:1.84	Fri Dec 10 11:33:11 2021
+++ src/sys/dev/pci/ixgbe/ixgbe.h	Thu Mar 10 04:14:34 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.84 2021/12/10 11:33:11 msaitoh Exp $ */
+/* $NetBSD: ixgbe.h,v 1.85 2022/03/10 04:14:34 msaitoh Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -645,6 +645,7 @@ struct adapter {
 
 	struct sysctllog	*sysctllog;
 	const struct sysctlnode *sysctltop;
+	struct timeval		lasterr_time;
 };
 
 /* Precision Time Sync (IEEE 1588) defines */

Reply via email to