[Dropping Lijun from CC due to a bounce]

On Fri, 2023-01-13 at 12:27 -0500, Alexander Motin wrote:
> Previous code reported "smart" state based on number of bits
> set in the module health field.  But actually any single bit
> set there already means critical failure.  Rework the logic
> according to specification, properly reporting non-critical
> state in case of warning threshold reached, critical in case
> of any module health bit set or error threshold reached and
> fatal if NVDIMM exhausted its life time.  In attempt to
> report the cause of failure in absence of better methods,
> report reached thresholds as more or less matching alarms.

This looks like unnecessarily aggressive wrapping. Commit messages are
typically wrapped at 72 chars.

Other than that this looks fine.

> 
> Signed-off-by: Alexander Motin <[email protected]>
> ---
>  ndctl/lib/msft.c | 55 ++++++++++++++++++++++++++----------------------
>  1 file changed, 30 insertions(+), 25 deletions(-)
> 
> diff --git a/ndctl/lib/msft.c b/ndctl/lib/msft.c
> index efc7fe1..8f66c97 100644
> --- a/ndctl/lib/msft.c
> +++ b/ndctl/lib/msft.c
> @@ -114,26 +114,32 @@ static unsigned int msft_cmd_smart_get_flags(struct 
> ndctl_cmd *cmd)
>  
>         /* below health data can be retrieved via MSFT _DSM function 11 */
>         return ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID |
> -           ND_SMART_USED_VALID;
> +           ND_SMART_USED_VALID | ND_SMART_ALARM_VALID;
>  }
>  
> -static unsigned int num_set_bit_health(__u16 num)
> +static unsigned int msft_cmd_smart_get_health(struct ndctl_cmd *cmd)
>  {
> -       int i;
> -       __u16 n = num & 0x7FFF;
> -       unsigned int count = 0;
> +       unsigned int health = 0;
> +       int rc;
>  
> -       for (i = 0; i < 15; i++)
> -               if (!!(n & (1 << i)))
> -                       count++;
> +       rc = msft_smart_valid(cmd);
> +       if (rc < 0) {
> +               errno = -rc;
> +               return UINT_MAX;
> +       }
>  
> -       return count;
> +       if (CMD_MSFT_SMART(cmd)->nvm_lifetime == 0)
> +               health |= ND_SMART_FATAL_HEALTH;
> +       if (CMD_MSFT_SMART(cmd)->health != 0 ||
> +           CMD_MSFT_SMART(cmd)->err_thresh_stat != 0)
> +               health |= ND_SMART_CRITICAL_HEALTH;
> +       if (CMD_MSFT_SMART(cmd)->warn_thresh_stat != 0)
> +               health |= ND_SMART_NON_CRITICAL_HEALTH;
> +       return health;
>  }
>  
> -static unsigned int msft_cmd_smart_get_health(struct ndctl_cmd *cmd)
> +static unsigned int msft_cmd_smart_get_media_temperature(struct ndctl_cmd 
> *cmd)
>  {
> -       unsigned int health;
> -       unsigned int num;
>         int rc;
>  
>         rc = msft_smart_valid(cmd);
> @@ -142,21 +148,13 @@ static unsigned int msft_cmd_smart_get_health(struct 
> ndctl_cmd *cmd)
>                 return UINT_MAX;
>         }
>  
> -       num = num_set_bit_health(CMD_MSFT_SMART(cmd)->health);
> -       if (num == 0)
> -               health = 0;
> -       else if (num < 2)
> -               health = ND_SMART_NON_CRITICAL_HEALTH;
> -       else if (num < 3)
> -               health = ND_SMART_CRITICAL_HEALTH;
> -       else
> -               health = ND_SMART_FATAL_HEALTH;
> -
> -       return health;
> +       return CMD_MSFT_SMART(cmd)->temp * 16;
>  }
>  
> -static unsigned int msft_cmd_smart_get_media_temperature(struct ndctl_cmd 
> *cmd)
> +static unsigned int msft_cmd_smart_get_alarm_flags(struct ndctl_cmd *cmd)
>  {
> +       __u8 stat;
> +       unsigned int flags = 0;
>         int rc;
>  
>         rc = msft_smart_valid(cmd);
> @@ -165,7 +163,13 @@ static unsigned int 
> msft_cmd_smart_get_media_temperature(struct ndctl_cmd *cmd)
>                 return UINT_MAX;
>         }
>  
> -       return CMD_MSFT_SMART(cmd)->temp * 16;
> +       stat = CMD_MSFT_SMART(cmd)->err_thresh_stat |
> +           CMD_MSFT_SMART(cmd)->warn_thresh_stat;
> +       if (stat & 3) /* NVM_LIFETIME/ES_LIFETIME */
> +               flags |= ND_SMART_SPARE_TRIP;
> +       if (stat & 4) /* ES_TEMP */
> +               flags |= ND_SMART_CTEMP_TRIP;
> +       return flags;
>  }
>  
>  static unsigned int msft_cmd_smart_get_life_used(struct ndctl_cmd *cmd)
> @@ -209,6 +213,7 @@ struct ndctl_dimm_ops * const msft_dimm_ops = &(struct 
> ndctl_dimm_ops) {
>         .smart_get_flags = msft_cmd_smart_get_flags,
>         .smart_get_health = msft_cmd_smart_get_health,
>         .smart_get_media_temperature = msft_cmd_smart_get_media_temperature,
> +       .smart_get_alarm_flags = msft_cmd_smart_get_alarm_flags,
>         .smart_get_life_used = msft_cmd_smart_get_life_used,
>         .xlat_firmware_status = msft_cmd_xlat_firmware_status,
>  };

Reply via email to