Ports temperature has most significant impact on system thermal state and should be considered by the thermal algorithm. The thermal zone temperature is extended for reading ports temperatures along with a chip temperature. The temperature value, provided to the core thermal algorithm will be accumulated value of a chip and ports temperature sensing, normalized according to the basic constant thresholds.
Signed-off-by: Vadim Pasternak <vad...@mellanox.com> Acked-by: Jiri Pirko <j...@mellanox.com> --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 66 ++++++++++++++++++++-- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 65962ed..23d6197 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -109,6 +109,8 @@ struct mlxsw_thermal { u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1]; struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; + int count; + int *ports_temp_cache; }; static inline u8 mlxsw_state_to_duty(int state) @@ -213,10 +215,11 @@ static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev, return 0; } -static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, - int *p_temp) +static int mlxsw_thermal_init_temp(struct mlxsw_thermal *thermal, + struct mlxsw_env_temp_thresh *delta, + struct mlxsw_env_temp_multi *multi, + int *p_temp, bool *p_crit) { - struct mlxsw_thermal *thermal = tzdev->devdata; struct device *dev = thermal->bus_info->dev; char mtmp_pl[MLXSW_REG_MTMP_LEN]; unsigned int temp; @@ -231,10 +234,58 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); - *p_temp = (int) temp; + if (temp >= MLXSW_ENV_TEMP_CRIT) { + *p_crit = true; + } else if (temp < MLXSW_ENV_TEMP_NORM) { + multi->thresh.normal = temp; + delta->normal = MLXSW_ENV_TEMP_NORM - temp; + } else if (temp >= MLXSW_ENV_TEMP_HOT) { + multi->thresh.crit = temp; + delta->crit = temp - MLXSW_ENV_TEMP_HOT; + multi->mask |= MLXSW_ENV_CRIT_MASK; + } else { + multi->thresh.hot = temp; + delta->hot = temp - MLXSW_ENV_TEMP_NORM; + multi->mask |= MLXSW_ENV_HOT_MASK; + } + *p_temp = temp; + return 0; } +static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + struct device *dev = thermal->bus_info->dev; + struct mlxsw_env_temp_multi multi; + struct mlxsw_env_temp_thresh delta; + bool crit = false; + int err; + + memset(&multi, 0, sizeof(struct mlxsw_env_temp_multi)); + memset(&delta, 0, sizeof(struct mlxsw_env_temp_thresh)); + /* Read ASIC temperature */ + err = mlxsw_thermal_init_temp(thermal, &delta, &multi, + p_temp, &crit); + if (err) { + dev_err(dev, "Failed to query ASIC temp sensor\n"); + return err; + } + + /* No need to proceed ports temperature reading, since ASIC temperature + * should be resulted in system shutdown. + */ + if (crit) + return 0; + + /* Collect ports temperature */ + return mlxsw_env_collect_port_temp(thermal->core, + thermal->ports_temp_cache, + thermal->count, &multi, &delta, + NULL, p_temp); +} + static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev, int trip, enum thermal_trip_type *p_type) @@ -436,6 +487,7 @@ int mlxsw_thermal_init(struct mlxsw_core *core, const struct mlxsw_bus_info *bus_info, struct mlxsw_thermal **p_thermal) { + unsigned int max_ports = mlxsw_core_max_ports(core); char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 }; enum mlxsw_reg_mfcr_pwm_frequency freq; struct device *dev = bus_info->dev; @@ -452,6 +504,12 @@ int mlxsw_thermal_init(struct mlxsw_core *core, thermal->core = core; thermal->bus_info = bus_info; memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips)); + thermal->ports_temp_cache = devm_kmalloc_array(dev, max_ports, + sizeof(int), + GFP_KERNEL); + if (!thermal->ports_temp_cache) + return -ENOMEM; + thermal->count = max_ports; err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl); if (err) { -- 2.1.4