iwx(4) hardware can monitor device temperature and notify the driver when a critical temperature has been reached.
This patch enables the mechanism. The driver will now turn the device off and print a message to dmesg if the firmware signals critical temperature. Comments in Linux source code (reproduced in this patch) imply that firmware will also make use of a Tx-backoff mechanism to regulate temperature. I have been doing driver development and testing for a while now with this patch in place and no visible downsides. I've not seen a critical temperature notification yet and I'd rather not try to provoke one since I only have one device :-) ok? diff 46dd352d1f9381c0e7702f0426f195b14c3d66d8 a6511f65cdcb27cc87ec2022a72e2600f0ab8969 blob - c44544816b57a627e8b071c1ebcb8408742d9f8c blob + ca6d9fead056a08b48f61b30f2107afa68ebcda3 --- sys/dev/pci/if_iwx.c +++ sys/dev/pci/if_iwx.c @@ -437,6 +437,7 @@ int iwx_sf_config(struct iwx_softc *, int); int iwx_send_bt_init_conf(struct iwx_softc *); int iwx_send_soc_conf(struct iwx_softc *); int iwx_send_update_mcc_cmd(struct iwx_softc *, const char *); +int iwx_send_temp_report_ths_cmd(struct iwx_softc *); int iwx_init_hw(struct iwx_softc *); int iwx_init(struct ifnet *); void iwx_start(struct ifnet *); @@ -6813,6 +6814,29 @@ out: } int +iwx_send_temp_report_ths_cmd(struct iwx_softc *sc) +{ + struct iwx_temp_report_ths_cmd cmd; + int err; + + /* + * In order to give responsibility for critical-temperature-kill + * and TX backoff to FW we need to send an empty temperature + * reporting command at init time. + */ + memset(&cmd, 0, sizeof(cmd)); + + err = iwx_send_cmd_pdu(sc, + IWX_WIDE_ID(IWX_PHY_OPS_GROUP, IWX_TEMP_REPORTING_THRESHOLDS_CMD), + 0, sizeof(cmd), &cmd); + if (err) + printf("%s: TEMP_REPORT_THS_CMD command failed (error %d)\n", + DEVNAME(sc), err); + + return err; +} + +int iwx_init_hw(struct iwx_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; @@ -6896,6 +6920,12 @@ iwx_init_hw(struct iwx_softc *sc) DEVNAME(sc), err); } + if (isset(sc->sc_enabled_capa, IWX_UCODE_TLV_CAPA_CT_KILL_BY_FW)) { + err = iwx_send_temp_report_ths_cmd(sc); + if (err) + goto err; + } + err = iwx_power_update_device(sc); if (err) { printf("%s: could not send power command (error %d)\n", @@ -7614,7 +7644,21 @@ iwx_rx_pkt(struct iwx_softc *sc, struct iwx_rx_data *d case IWX_DTS_MEASUREMENT_NOTIFICATION: case IWX_WIDE_ID(IWX_PHY_OPS_GROUP, IWX_DTS_MEASUREMENT_NOTIF_WIDE): + case IWX_WIDE_ID(IWX_PHY_OPS_GROUP, + IWX_TEMP_REPORTING_THRESHOLDS_CMD): break; + + case IWX_WIDE_ID(IWX_PHY_OPS_GROUP, + IWX_CT_KILL_NOTIFICATION): { + struct iwx_ct_kill_notif *notif; + SYNC_RESP_STRUCT(notif, pkt); + printf("%s: device at critical temperature (%u degC), " + "stopping device\n", + DEVNAME(sc), le16toh(notif->temperature)); + sc->sc_flags |= IWX_FLAG_HW_ERR; + task_add(systq, &sc->init_task); + break; + } case IWX_WIDE_ID(IWX_REGULATORY_AND_NVM_GROUP, IWX_NVM_GET_INFO): blob - 0892fd4e61f3ce4d57c2770395a07c13c4386a8e blob + 74f17f31c85ca3a67b3fcbc1699a167d04ea6c6c --- sys/dev/pci/if_iwxreg.h +++ sys/dev/pci/if_iwxreg.h @@ -1685,6 +1685,31 @@ struct iwx_phy_cfg_cmd { #define IWX_PHY_CFG_RX_CHAIN_B (1 << 13) #define IWX_PHY_CFG_RX_CHAIN_C (1 << 14) +#define IWX_MAX_DTS_TRIPS 8 + +/** + * struct iwx_ct_kill_notif - CT-kill entry notification + * + * @temperature: the current temperature in celsius + * @reserved: reserved + */ +struct iwx_ct_kill_notif { + uint16_t temperature; + uint16_t reserved; +} __packed; /* GRP_PHY_CT_KILL_NTF */ + +/** + * struct iwx_temp_report_ths_cmd - set temperature thresholds + * (IWX_TEMP_REPORTING_THRESHOLDS_CMD) + * + * @num_temps: number of temperature thresholds passed + * @thresholds: array with the thresholds to be configured + */ +struct iwx_temp_report_ths_cmd { + uint32_t num_temps; + uint16_t thresholds[IWX_MAX_DTS_TRIPS]; +} __packed; /* GRP_PHY_TEMP_REPORTING_THRESHOLDS_CMD */ + #define IWX_NVM_VERSION 0 /* 8k family NVM HW-Section offset (in words) definitions */