Re: [RFC PATCH 2/3] powernv/mce: Print correct severity for mce error.

2019-03-29 Thread Mahesh Jagannath Salgaonkar
On 3/29/19 5:53 AM, Michael Ellerman wrote:
> Mahesh J Salgaonkar  writes:
>> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
>> index 8d0b1c24c636..314ed3f13d59 100644
>> --- a/arch/powerpc/include/asm/mce.h
>> +++ b/arch/powerpc/include/asm/mce.h
>> @@ -110,17 +110,18 @@ enum MCE_LinkErrorType {
>>  };
>>  
>>  struct machine_check_event {
>> -enum MCE_Versionversion:8;  /* 0x00 */
>> -uint8_t in_use; /* 0x01 */
>> -enum MCE_Severity   severity:8; /* 0x02 */
>> -enum MCE_Initiator  initiator:8;/* 0x03 */
>> -enum MCE_ErrorType  error_type:8;   /* 0x04 */
>> -enum MCE_Dispositiondisposition:8;  /* 0x05 */
>> -uint16_tcpu;/* 0x06 */
>> -uint64_tgpr3;   /* 0x08 */
>> -uint64_tsrr0;   /* 0x10 */
>> -uint64_tsrr1;   /* 0x18 */
>> -union { /* 0x20 */
>> +enum MCE_Versionversion:8;
>> +uint8_t in_use;
>> +enum MCE_Severity   severity:8;
>> +enum MCE_Initiator  initiator:8;
>> +enum MCE_ErrorType  error_type:8;
>> +enum MCE_Dispositiondisposition:8;
>> +uint8_t sync_error;
>> +uint16_tcpu;
>> +uint64_tgpr3;
>> +uint64_tsrr0;
>> +uint64_tsrr1;
> 
> Can you switch these to use kernel types while you're at it, ie. u8, u64 etc.

sure.

> 
>> @@ -194,6 +195,7 @@ struct mce_error_info {
>>  } u;
>>  enum MCE_Severity   severity:8;
>>  enum MCE_Initiator  initiator:8;
>> +uint8_t sync_error;
> 
> u8 here but bool later?

Will make it bool everywhere.

Thanks,
-Mahesh.



Re: [RFC PATCH 2/3] powernv/mce: Print correct severity for mce error.

2019-03-28 Thread Michael Ellerman
Mahesh J Salgaonkar  writes:
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index 8d0b1c24c636..314ed3f13d59 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -110,17 +110,18 @@ enum MCE_LinkErrorType {
>  };
>  
>  struct machine_check_event {
> - enum MCE_Versionversion:8;  /* 0x00 */
> - uint8_t in_use; /* 0x01 */
> - enum MCE_Severity   severity:8; /* 0x02 */
> - enum MCE_Initiator  initiator:8;/* 0x03 */
> - enum MCE_ErrorType  error_type:8;   /* 0x04 */
> - enum MCE_Dispositiondisposition:8;  /* 0x05 */
> - uint16_tcpu;/* 0x06 */
> - uint64_tgpr3;   /* 0x08 */
> - uint64_tsrr0;   /* 0x10 */
> - uint64_tsrr1;   /* 0x18 */
> - union { /* 0x20 */
> + enum MCE_Versionversion:8;
> + uint8_t in_use;
> + enum MCE_Severity   severity:8;
> + enum MCE_Initiator  initiator:8;
> + enum MCE_ErrorType  error_type:8;
> + enum MCE_Dispositiondisposition:8;
> + uint8_t sync_error;
> + uint16_tcpu;
> + uint64_tgpr3;
> + uint64_tsrr0;
> + uint64_tsrr1;

Can you switch these to use kernel types while you're at it, ie. u8, u64 etc.

> @@ -194,6 +195,7 @@ struct mce_error_info {
>   } u;
>   enum MCE_Severity   severity:8;
>   enum MCE_Initiator  initiator:8;
> + uint8_t sync_error;

u8 here but bool later?

> diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
> index 6b800eec31f2..06161de19060 100644
> --- a/arch/powerpc/kernel/mce_power.c
> +++ b/arch/powerpc/kernel/mce_power.c
> @@ -133,106 +133,107 @@ struct mce_ierror_table {
>   unsigned int error_subtype;
>   unsigned int initiator;
>   unsigned int severity;
> + bool sync_error;
>  };

ie. here it's a bool?

> @@ -539,8 +543,9 @@ static int mce_handle_derror(struct pt_regs *regs,
>   return handled;
>  
>   mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
> - mce_err->severity = MCE_SEV_ERROR_SYNC;
> + mce_err->severity = MCE_SEV_SEVERE;
>   mce_err->initiator = MCE_INITIATOR_CPU;
> + mce_err->sync_error = 1;

u8 or bool?

cheers


[RFC PATCH 2/3] powernv/mce: Print correct severity for mce error.

2019-03-22 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

Currently all machine check errors are printed as severe errors which isn't
correct. Print soft errors as warning instead of severe errors.

Signed-off-by: Mahesh Salgaonkar 
---
 arch/powerpc/include/asm/mce.h|   26 +++---
 arch/powerpc/kernel/mce.c |5 +
 arch/powerpc/kernel/mce_power.c   |  143 +
 arch/powerpc/platforms/powernv/opal.c |2 
 4 files changed, 92 insertions(+), 84 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 8d0b1c24c636..314ed3f13d59 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -31,7 +31,7 @@ enum MCE_Version {
 enum MCE_Severity {
MCE_SEV_NO_ERROR = 0,
MCE_SEV_WARNING = 1,
-   MCE_SEV_ERROR_SYNC = 2,
+   MCE_SEV_SEVERE = 2,
MCE_SEV_FATAL = 3,
 };
 
@@ -110,17 +110,18 @@ enum MCE_LinkErrorType {
 };
 
 struct machine_check_event {
-   enum MCE_Versionversion:8;  /* 0x00 */
-   uint8_t in_use; /* 0x01 */
-   enum MCE_Severity   severity:8; /* 0x02 */
-   enum MCE_Initiator  initiator:8;/* 0x03 */
-   enum MCE_ErrorType  error_type:8;   /* 0x04 */
-   enum MCE_Dispositiondisposition:8;  /* 0x05 */
-   uint16_tcpu;/* 0x06 */
-   uint64_tgpr3;   /* 0x08 */
-   uint64_tsrr0;   /* 0x10 */
-   uint64_tsrr1;   /* 0x18 */
-   union { /* 0x20 */
+   enum MCE_Versionversion:8;
+   uint8_t in_use;
+   enum MCE_Severity   severity:8;
+   enum MCE_Initiator  initiator:8;
+   enum MCE_ErrorType  error_type:8;
+   enum MCE_Dispositiondisposition:8;
+   uint8_t sync_error;
+   uint16_tcpu;
+   uint64_tgpr3;
+   uint64_tsrr0;
+   uint64_tsrr1;
+   union {
struct {
enum MCE_UeErrorType ue_error_type:8;
uint8_t effective_address_provided;
@@ -194,6 +195,7 @@ struct mce_error_info {
} u;
enum MCE_Severity   severity:8;
enum MCE_Initiator  initiator:8;
+   uint8_t sync_error;
 };
 
 #define MAX_MC_EVT 100
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 44614462cb34..588a280a8a4a 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -122,6 +122,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
 
mce->initiator = mce_err->initiator;
mce->severity = mce_err->severity;
+   mce->sync_error = mce_err->sync_error;
 
/*
 * Populate the mce error_type and type-specific error_type.
@@ -374,9 +375,9 @@ void machine_check_print_event_info(struct 
machine_check_event *evt,
break;
case MCE_SEV_WARNING:
level = KERN_WARNING;
-   sevstr = "";
+   sevstr = "Warning";
break;
-   case MCE_SEV_ERROR_SYNC:
+   case MCE_SEV_SEVERE:
level = KERN_ERR;
sevstr = "Severe";
break;
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 6b800eec31f2..06161de19060 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -133,106 +133,107 @@ struct mce_ierror_table {
unsigned int error_subtype;
unsigned int initiator;
unsigned int severity;
+   bool sync_error;
 };
 
 static const struct mce_ierror_table mce_p7_ierror_table[] = {
 { 0x001c, 0x0004, true,
   MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH,
-  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
 { 0x001c, 0x0008, true,
   MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
-  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
 { 0x001c, 0x000c, true,
   MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
-  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
 { 0x001c, 0x0010, true,
   MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
-  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
 { 0x001c, 0x0014, true,
   MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
-  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
 { 0x001c, 0x0018, true,
   MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
-  MCE_INITIATOR_CPU,  MCE_SEV_ERROR_SYNC, },
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
 { 0x001c,