On Thursday 06 June 2019 11:36 AM, Greg Kurz wrote:
> On Thu, 6 Jun 2019 13:06:14 +1000
> David Gibson <da...@gibson.dropbear.id.au> wrote:
>
>> On Wed, May 29, 2019 at 11:10:57AM +0530, Aravinda Prasad wrote:
>>> This patch includes migration support for machine check
>>> handling. Especially this patch blocks VM migration
>>> requests until the machine check error handling is
>>> complete as (i) these errors are specific to the source
>>> hardware and is irrelevant on the target hardware,
>>> (ii) these errors cause data corruption and should
>>> be handled before migration.
>>>
>>> Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com>
>>> ---
>>> hw/ppc/spapr.c | 20 ++++++++++++++++++++
>>> hw/ppc/spapr_events.c | 17 +++++++++++++++++
>>> hw/ppc/spapr_rtas.c | 4 ++++
>>> include/hw/ppc/spapr.h | 2 ++
>>> 4 files changed, 43 insertions(+)
>>>
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index e8a77636..31c4850 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -2104,6 +2104,25 @@ static const VMStateDescription vmstate_spapr_dtb = {
>>> },
>>> };
>>>
>>> +static bool spapr_fwnmi_needed(void *opaque)
>>> +{
>>> + SpaprMachineState *spapr = (SpaprMachineState *)opaque;
>>> +
>>> + return (spapr->guest_machine_check_addr == -1) ? 0 : 1;
>>
>> Since we're introducing a PAPR capability to enable this, it would
>> actually be better to check that here, rather than the runtime state.
>> That leads to less cases and easier to understand semantics for the
>> migration stream.
>>
>
> Hmmm... the purpose of needed() VMState callbacks is precisely about
> runtime state: the subsection should only be migrated if an MCE is
> pending, ie. spapr->guest_machine_check_addr != -1.
spapr->guest_machine_check_addr is set when fwnmi is registered. Hence
an MCE might not be pending if it is set.
I am fine with both the approaches (checking for
guest_machine_check_addr or for SPAPR_CAP_FWNMI_MCE).
Regards,
Aravinda
>
>>> +}
>>> +
>>> +static const VMStateDescription vmstate_spapr_machine_check = {
>>> + .name = "spapr_machine_check",
>>> + .version_id = 1,
>>> + .minimum_version_id = 1,
>>> + .needed = spapr_fwnmi_needed,
>>> + .fields = (VMStateField[]) {
>>> + VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
>>> + VMSTATE_INT32(mc_status, SpaprMachineState),
>>> + VMSTATE_END_OF_LIST()
>>> + },
>>> +};
>>> +
>>> static const VMStateDescription vmstate_spapr = {
>>> .name = "spapr",
>>> .version_id = 3,
>>> @@ -2137,6 +2156,7 @@ static const VMStateDescription vmstate_spapr = {
>>> &vmstate_spapr_dtb,
>>> &vmstate_spapr_cap_large_decr,
>>> &vmstate_spapr_cap_ccf_assist,
>>> + &vmstate_spapr_machine_check,
>>> NULL
>>> }
>>> };
>>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
>>> index 573c0b7..35e21e4 100644
>>> --- a/hw/ppc/spapr_events.c
>>> +++ b/hw/ppc/spapr_events.c
>>> @@ -41,6 +41,7 @@
>>> #include "qemu/bcd.h"
>>> #include "hw/ppc/spapr_ovec.h"
>>> #include <libfdt.h>
>>> +#include "migration/blocker.h"
>>>
>>> #define RTAS_LOG_VERSION_MASK 0xff000000
>>> #define RTAS_LOG_VERSION_6 0x06000000
>>> @@ -855,6 +856,22 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu,
>>> bool recovered)
>>> void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>>> {
>>> SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>>> + int ret;
>>> + Error *local_err = NULL;
>>> +
>>> + error_setg(&spapr->fwnmi_migration_blocker,
>>> + "Live migration not supported during machine check handling");
>>> + ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err);
>>> + if (ret < 0) {
>>> + /*
>>> + * We don't want to abort and let the migration to continue. In a
>>> + * rare case, the machine check handler will run on the target
>>> + * hardware. Though this is not preferable, it is better than
>>> aborting
>>> + * the migration or killing the VM.
>>> + */
>>> + error_free(spapr->fwnmi_migration_blocker);
>>
>> You should set fwnmi_migration_blocker to NULL here as well.
>>
>> As mentioned on an earlier iteration, the migration blocker is the
>> same every time. Couldn't you just create it once and free at final
>> teardown, rather than recreating it for every NMI?
>>
>>> + warn_report_err(local_err);
>>> + }
>>>
>>> while (spapr->mc_status != -1) {
>>> /*
>>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>>> index 91a7ab9..c849223 100644
>>> --- a/hw/ppc/spapr_rtas.c
>>> +++ b/hw/ppc/spapr_rtas.c
>>> @@ -50,6 +50,7 @@
>>> #include "target/ppc/mmu-hash64.h"
>>> #include "target/ppc/mmu-book3s-v3.h"
>>> #include "kvm_ppc.h"
>>> +#include "migration/blocker.h"
>>>
>>> static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState
>>> *spapr,
>>> uint32_t token, uint32_t nargs,
>>> @@ -404,6 +405,9 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>>> spapr->mc_status = -1;
>>> qemu_cond_signal(&spapr->mc_delivery_cond);
>>> rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>> + migrate_del_blocker(spapr->fwnmi_migration_blocker);
>>> + error_free(spapr->fwnmi_migration_blocker);
>>> + spapr->fwnmi_migration_blocker = NULL;
>>> }
>>> }
>>>
>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>> index bd75d4b..6c0cfd8 100644
>>> --- a/include/hw/ppc/spapr.h
>>> +++ b/include/hw/ppc/spapr.h
>>> @@ -214,6 +214,8 @@ struct SpaprMachineState {
>>> SpaprCapabilities def, eff, mig;
>>>
>>> unsigned gpu_numa_id;
>>> +
>>> + Error *fwnmi_migration_blocker;
>>> };
>>>
>>> #define H_SUCCESS 0
>>>
>>
>
--
Regards,
Aravinda