Re: [Qemu-devel] [PATCH v10 4/6] target/ppc: Build rtas error log upon an MCE

2019-07-03 Thread Aravinda Prasad



On Wednesday 03 July 2019 08:37 AM, David Gibson wrote:
> On Tue, Jul 02, 2019 at 03:19:24PM +0530, Aravinda Prasad wrote:
>>
>>
>> On Tuesday 02 July 2019 09:33 AM, David Gibson wrote:
>>> On Wed, Jun 12, 2019 at 02:51:21PM +0530, Aravinda Prasad wrote:
 Upon a machine check exception (MCE) in a guest address space,
 KVM causes a guest exit to enable QEMU to build and pass the
 error to the guest in the PAPR defined rtas error log format.

 This patch builds the rtas error log, copies it to the rtas_addr
 and then invokes the guest registered machine check handler. The
 handler in the guest takes suitable action(s) depending on the type
 and criticality of the error. For example, if an error is
 unrecoverable memory corruption in an application inside the
 guest, then the guest kernel sends a SIGBUS to the application.
 For recoverable errors, the guest performs recovery actions and
 logs the error.

 Signed-off-by: Aravinda Prasad 
 ---
  hw/ppc/spapr.c |   13 +++
  hw/ppc/spapr_events.c  |  238 
 
  hw/ppc/spapr_rtas.c|   26 +
  include/hw/ppc/spapr.h |6 +
  target/ppc/kvm.c   |4 +
  5 files changed, 284 insertions(+), 3 deletions(-)

 diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
 index 6cc2c3b..d61905b 100644
 --- a/hw/ppc/spapr.c
 +++ b/hw/ppc/spapr.c
 @@ -2908,6 +2908,19 @@ static void spapr_machine_init(MachineState 
 *machine)
  error_report("Could not get size of LPAR rtas '%s'", filename);
  exit(1);
  }
 +
 +if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
 +/*
 + * Ensure that the rtas image size is less than 
 RTAS_ERROR_LOG_OFFSET
 + * or else the rtas image will be overwritten with the rtas error 
 log
 + * when a machine check exception is encountered.
 + */
 +g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
 +
 +/* Resize rtas blob to accommodate error log */
 +spapr->rtas_size = RTAS_ERROR_LOG_MAX;
 +}
 +
  spapr->rtas_blob = g_malloc(spapr->rtas_size);
  if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 
 0) {
  error_report("Could not load LPAR rtas '%s'", filename);
 diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
 index a0c66d7..51c052e 100644
 --- a/hw/ppc/spapr_events.c
 +++ b/hw/ppc/spapr_events.c
 @@ -212,6 +212,106 @@ struct hp_extended_log {
  struct rtas_event_log_v6_hp hp;
  } QEMU_PACKED;
  
 +struct rtas_event_log_v6_mc {
 +#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
 +struct rtas_event_log_v6_section_header hdr;
 +uint32_t fru_id;
 +uint32_t proc_id;
 +uint8_t error_type;
 +#define RTAS_LOG_V6_MC_TYPE_UE   0
 +#define RTAS_LOG_V6_MC_TYPE_SLB  1
 +#define RTAS_LOG_V6_MC_TYPE_ERAT 2
 +#define RTAS_LOG_V6_MC_TYPE_TLB  4
 +#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
 +#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
 +uint8_t sub_err_type;
 +#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
 +#define RTAS_LOG_V6_MC_UE_IFETCH 1
 +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
 +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
 +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
 +#define RTAS_LOG_V6_MC_SLB_PARITY0
 +#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
 +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
 +#define RTAS_LOG_V6_MC_ERAT_PARITY   1
 +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
 +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
 +#define RTAS_LOG_V6_MC_TLB_PARITY1
 +#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
 +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
 +uint8_t reserved_1[6];
 +uint64_t effective_address;
 +uint64_t logical_address;
 +} QEMU_PACKED;
 +
 +struct mc_extended_log {
 +struct rtas_event_log_v6 v6hdr;
 +struct rtas_event_log_v6_mc mc;
 +} QEMU_PACKED;
 +
 +struct MC_ierror_table {
 +unsigned long srr1_mask;
 +unsigned long srr1_value;
 +bool nip_valid; /* nip is a valid indicator of faulting address */
 +uint8_t error_type;
 +uint8_t error_subtype;
 +unsigned int initiator;
 +unsigned int severity;
 +};
 +
 +static const struct MC_ierror_table mc_ierror_table[] = {
 +{ 0x000

Re: [Qemu-devel] [PATCH v10 4/6] target/ppc: Build rtas error log upon an MCE

2019-07-02 Thread David Gibson
On Tue, Jul 02, 2019 at 03:19:24PM +0530, Aravinda Prasad wrote:
> 
> 
> On Tuesday 02 July 2019 09:33 AM, David Gibson wrote:
> > On Wed, Jun 12, 2019 at 02:51:21PM +0530, Aravinda Prasad wrote:
> >> Upon a machine check exception (MCE) in a guest address space,
> >> KVM causes a guest exit to enable QEMU to build and pass the
> >> error to the guest in the PAPR defined rtas error log format.
> >>
> >> This patch builds the rtas error log, copies it to the rtas_addr
> >> and then invokes the guest registered machine check handler. The
> >> handler in the guest takes suitable action(s) depending on the type
> >> and criticality of the error. For example, if an error is
> >> unrecoverable memory corruption in an application inside the
> >> guest, then the guest kernel sends a SIGBUS to the application.
> >> For recoverable errors, the guest performs recovery actions and
> >> logs the error.
> >>
> >> Signed-off-by: Aravinda Prasad 
> >> ---
> >>  hw/ppc/spapr.c |   13 +++
> >>  hw/ppc/spapr_events.c  |  238 
> >> 
> >>  hw/ppc/spapr_rtas.c|   26 +
> >>  include/hw/ppc/spapr.h |6 +
> >>  target/ppc/kvm.c   |4 +
> >>  5 files changed, 284 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 6cc2c3b..d61905b 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -2908,6 +2908,19 @@ static void spapr_machine_init(MachineState 
> >> *machine)
> >>  error_report("Could not get size of LPAR rtas '%s'", filename);
> >>  exit(1);
> >>  }
> >> +
> >> +if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
> >> +/*
> >> + * Ensure that the rtas image size is less than 
> >> RTAS_ERROR_LOG_OFFSET
> >> + * or else the rtas image will be overwritten with the rtas error 
> >> log
> >> + * when a machine check exception is encountered.
> >> + */
> >> +g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
> >> +
> >> +/* Resize rtas blob to accommodate error log */
> >> +spapr->rtas_size = RTAS_ERROR_LOG_MAX;
> >> +}
> >> +
> >>  spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >>  if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 
> >> 0) {
> >>  error_report("Could not load LPAR rtas '%s'", filename);
> >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> >> index a0c66d7..51c052e 100644
> >> --- a/hw/ppc/spapr_events.c
> >> +++ b/hw/ppc/spapr_events.c
> >> @@ -212,6 +212,106 @@ struct hp_extended_log {
> >>  struct rtas_event_log_v6_hp hp;
> >>  } QEMU_PACKED;
> >>  
> >> +struct rtas_event_log_v6_mc {
> >> +#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
> >> +struct rtas_event_log_v6_section_header hdr;
> >> +uint32_t fru_id;
> >> +uint32_t proc_id;
> >> +uint8_t error_type;
> >> +#define RTAS_LOG_V6_MC_TYPE_UE   0
> >> +#define RTAS_LOG_V6_MC_TYPE_SLB  1
> >> +#define RTAS_LOG_V6_MC_TYPE_ERAT 2
> >> +#define RTAS_LOG_V6_MC_TYPE_TLB  4
> >> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
> >> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
> >> +uint8_t sub_err_type;
> >> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
> >> +#define RTAS_LOG_V6_MC_UE_IFETCH 1
> >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
> >> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
> >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
> >> +#define RTAS_LOG_V6_MC_SLB_PARITY0
> >> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
> >> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
> >> +#define RTAS_LOG_V6_MC_ERAT_PARITY   1
> >> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
> >> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
> >> +#define RTAS_LOG_V6_MC_TLB_PARITY1
> >> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
> >> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
> >> +uint8_t reserved_1[6];
> >> +uint64_t effective_address;
> >> +uint64_t logical_address;
> >> +} QEMU_PACKED;
> >> +
> >> +struct mc_extended_log {
> >> +struct rtas_event_log_v6 v6hdr;
> >> +struct rtas_event_log_v6_mc mc;
> >> +} QEMU_PACKED;
> >> +
> >> +struct MC_ierror_table {
> >> +unsigned long srr1_mask;
> >> +unsigned long srr1_value;
> >> +bool nip_valid; /* nip is a valid indicator of faulting address */
> >> +uint8_t error_type;
> >> +uint8_t error_subtype;
> >> +unsigned int initiator;
> >> +unsigned int severity;
> >> +};
> >> +
> >> +static const struct MC_ierror_table mc_ierror_table[] = {
> >> +{ 0x081c, 0x0004, true,
> >> +  RTAS_LOG_V6_M

Re: [Qemu-devel] [PATCH v10 4/6] target/ppc: Build rtas error log upon an MCE

2019-07-02 Thread Aravinda Prasad



On Tuesday 02 July 2019 09:33 AM, David Gibson wrote:
> On Wed, Jun 12, 2019 at 02:51:21PM +0530, Aravinda Prasad wrote:
>> Upon a machine check exception (MCE) in a guest address space,
>> KVM causes a guest exit to enable QEMU to build and pass the
>> error to the guest in the PAPR defined rtas error log format.
>>
>> This patch builds the rtas error log, copies it to the rtas_addr
>> and then invokes the guest registered machine check handler. The
>> handler in the guest takes suitable action(s) depending on the type
>> and criticality of the error. For example, if an error is
>> unrecoverable memory corruption in an application inside the
>> guest, then the guest kernel sends a SIGBUS to the application.
>> For recoverable errors, the guest performs recovery actions and
>> logs the error.
>>
>> Signed-off-by: Aravinda Prasad 
>> ---
>>  hw/ppc/spapr.c |   13 +++
>>  hw/ppc/spapr_events.c  |  238 
>> 
>>  hw/ppc/spapr_rtas.c|   26 +
>>  include/hw/ppc/spapr.h |6 +
>>  target/ppc/kvm.c   |4 +
>>  5 files changed, 284 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 6cc2c3b..d61905b 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2908,6 +2908,19 @@ static void spapr_machine_init(MachineState *machine)
>>  error_report("Could not get size of LPAR rtas '%s'", filename);
>>  exit(1);
>>  }
>> +
>> +if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
>> +/*
>> + * Ensure that the rtas image size is less than 
>> RTAS_ERROR_LOG_OFFSET
>> + * or else the rtas image will be overwritten with the rtas error 
>> log
>> + * when a machine check exception is encountered.
>> + */
>> +g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
>> +
>> +/* Resize rtas blob to accommodate error log */
>> +spapr->rtas_size = RTAS_ERROR_LOG_MAX;
>> +}
>> +
>>  spapr->rtas_blob = g_malloc(spapr->rtas_size);
>>  if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
>>  error_report("Could not load LPAR rtas '%s'", filename);
>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
>> index a0c66d7..51c052e 100644
>> --- a/hw/ppc/spapr_events.c
>> +++ b/hw/ppc/spapr_events.c
>> @@ -212,6 +212,106 @@ struct hp_extended_log {
>>  struct rtas_event_log_v6_hp hp;
>>  } QEMU_PACKED;
>>  
>> +struct rtas_event_log_v6_mc {
>> +#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
>> +struct rtas_event_log_v6_section_header hdr;
>> +uint32_t fru_id;
>> +uint32_t proc_id;
>> +uint8_t error_type;
>> +#define RTAS_LOG_V6_MC_TYPE_UE   0
>> +#define RTAS_LOG_V6_MC_TYPE_SLB  1
>> +#define RTAS_LOG_V6_MC_TYPE_ERAT 2
>> +#define RTAS_LOG_V6_MC_TYPE_TLB  4
>> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
>> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
>> +uint8_t sub_err_type;
>> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
>> +#define RTAS_LOG_V6_MC_UE_IFETCH 1
>> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
>> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
>> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
>> +#define RTAS_LOG_V6_MC_SLB_PARITY0
>> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
>> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
>> +#define RTAS_LOG_V6_MC_ERAT_PARITY   1
>> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
>> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
>> +#define RTAS_LOG_V6_MC_TLB_PARITY1
>> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
>> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
>> +uint8_t reserved_1[6];
>> +uint64_t effective_address;
>> +uint64_t logical_address;
>> +} QEMU_PACKED;
>> +
>> +struct mc_extended_log {
>> +struct rtas_event_log_v6 v6hdr;
>> +struct rtas_event_log_v6_mc mc;
>> +} QEMU_PACKED;
>> +
>> +struct MC_ierror_table {
>> +unsigned long srr1_mask;
>> +unsigned long srr1_value;
>> +bool nip_valid; /* nip is a valid indicator of faulting address */
>> +uint8_t error_type;
>> +uint8_t error_subtype;
>> +unsigned int initiator;
>> +unsigned int severity;
>> +};
>> +
>> +static const struct MC_ierror_table mc_ierror_table[] = {
>> +{ 0x081c, 0x0004, true,
>> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x081c, 0x0008, true,
>> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x081c,

Re: [Qemu-devel] [PATCH v10 4/6] target/ppc: Build rtas error log upon an MCE

2019-07-01 Thread David Gibson
On Wed, Jun 12, 2019 at 02:51:21PM +0530, Aravinda Prasad wrote:
> Upon a machine check exception (MCE) in a guest address space,
> KVM causes a guest exit to enable QEMU to build and pass the
> error to the guest in the PAPR defined rtas error log format.
> 
> This patch builds the rtas error log, copies it to the rtas_addr
> and then invokes the guest registered machine check handler. The
> handler in the guest takes suitable action(s) depending on the type
> and criticality of the error. For example, if an error is
> unrecoverable memory corruption in an application inside the
> guest, then the guest kernel sends a SIGBUS to the application.
> For recoverable errors, the guest performs recovery actions and
> logs the error.
> 
> Signed-off-by: Aravinda Prasad 
> ---
>  hw/ppc/spapr.c |   13 +++
>  hw/ppc/spapr_events.c  |  238 
> 
>  hw/ppc/spapr_rtas.c|   26 +
>  include/hw/ppc/spapr.h |6 +
>  target/ppc/kvm.c   |4 +
>  5 files changed, 284 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 6cc2c3b..d61905b 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2908,6 +2908,19 @@ static void spapr_machine_init(MachineState *machine)
>  error_report("Could not get size of LPAR rtas '%s'", filename);
>  exit(1);
>  }
> +
> +if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
> +/*
> + * Ensure that the rtas image size is less than RTAS_ERROR_LOG_OFFSET
> + * or else the rtas image will be overwritten with the rtas error log
> + * when a machine check exception is encountered.
> + */
> +g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
> +
> +/* Resize rtas blob to accommodate error log */
> +spapr->rtas_size = RTAS_ERROR_LOG_MAX;
> +}
> +
>  spapr->rtas_blob = g_malloc(spapr->rtas_size);
>  if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
>  error_report("Could not load LPAR rtas '%s'", filename);
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index a0c66d7..51c052e 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -212,6 +212,106 @@ struct hp_extended_log {
>  struct rtas_event_log_v6_hp hp;
>  } QEMU_PACKED;
>  
> +struct rtas_event_log_v6_mc {
> +#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
> +struct rtas_event_log_v6_section_header hdr;
> +uint32_t fru_id;
> +uint32_t proc_id;
> +uint8_t error_type;
> +#define RTAS_LOG_V6_MC_TYPE_UE   0
> +#define RTAS_LOG_V6_MC_TYPE_SLB  1
> +#define RTAS_LOG_V6_MC_TYPE_ERAT 2
> +#define RTAS_LOG_V6_MC_TYPE_TLB  4
> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
> +uint8_t sub_err_type;
> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
> +#define RTAS_LOG_V6_MC_UE_IFETCH 1
> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
> +#define RTAS_LOG_V6_MC_SLB_PARITY0
> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
> +#define RTAS_LOG_V6_MC_ERAT_PARITY   1
> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
> +#define RTAS_LOG_V6_MC_TLB_PARITY1
> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
> +uint8_t reserved_1[6];
> +uint64_t effective_address;
> +uint64_t logical_address;
> +} QEMU_PACKED;
> +
> +struct mc_extended_log {
> +struct rtas_event_log_v6 v6hdr;
> +struct rtas_event_log_v6_mc mc;
> +} QEMU_PACKED;
> +
> +struct MC_ierror_table {
> +unsigned long srr1_mask;
> +unsigned long srr1_value;
> +bool nip_valid; /* nip is a valid indicator of faulting address */
> +uint8_t error_type;
> +uint8_t error_subtype;
> +unsigned int initiator;
> +unsigned int severity;
> +};
> +
> +static const struct MC_ierror_table mc_ierror_table[] = {
> +{ 0x081c, 0x0004, true,
> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x081c, 0x0008, true,
> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x081c, 0x000c, true,
> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x081c, 0

[Qemu-devel] [PATCH v10 4/6] target/ppc: Build rtas error log upon an MCE

2019-06-12 Thread Aravinda Prasad
Upon a machine check exception (MCE) in a guest address space,
KVM causes a guest exit to enable QEMU to build and pass the
error to the guest in the PAPR defined rtas error log format.

This patch builds the rtas error log, copies it to the rtas_addr
and then invokes the guest registered machine check handler. The
handler in the guest takes suitable action(s) depending on the type
and criticality of the error. For example, if an error is
unrecoverable memory corruption in an application inside the
guest, then the guest kernel sends a SIGBUS to the application.
For recoverable errors, the guest performs recovery actions and
logs the error.

Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c |   13 +++
 hw/ppc/spapr_events.c  |  238 
 hw/ppc/spapr_rtas.c|   26 +
 include/hw/ppc/spapr.h |6 +
 target/ppc/kvm.c   |4 +
 5 files changed, 284 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6cc2c3b..d61905b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2908,6 +2908,19 @@ static void spapr_machine_init(MachineState *machine)
 error_report("Could not get size of LPAR rtas '%s'", filename);
 exit(1);
 }
+
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
+/*
+ * Ensure that the rtas image size is less than RTAS_ERROR_LOG_OFFSET
+ * or else the rtas image will be overwritten with the rtas error log
+ * when a machine check exception is encountered.
+ */
+g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
+
+/* Resize rtas blob to accommodate error log */
+spapr->rtas_size = RTAS_ERROR_LOG_MAX;
+}
+
 spapr->rtas_blob = g_malloc(spapr->rtas_size);
 if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
 error_report("Could not load LPAR rtas '%s'", filename);
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index a0c66d7..51c052e 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -212,6 +212,106 @@ struct hp_extended_log {
 struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
+struct rtas_event_log_v6_section_header hdr;
+uint32_t fru_id;
+uint32_t proc_id;
+uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE   0
+#define RTAS_LOG_V6_MC_TYPE_SLB  1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB  4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
+uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY   1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
+#define RTAS_LOG_V6_MC_TLB_PARITY1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+uint8_t reserved_1[6];
+uint64_t effective_address;
+uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+struct rtas_event_log_v6 v6hdr;
+struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+unsigned long srr1_mask;
+unsigned long srr1_value;
+bool nip_valid; /* nip is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x081c, 0x0004, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0008, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x000c, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0010, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0014, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERI