Hi Alex,
On 2/9/26 4:08 PM, Alex Bennée wrote:
> Eric Auger <[email protected]> writes:
>
>> Currently when the number of KVM registers exposed by the source is
>> larger than the one exposed on the destination, the migration fails
>> with: "failed to load cpu:cpreg_vmstate_array_len"
>>
>> This gives no information about which registers are causing the trouble.
>>
>> This patch reworks the target/arm/machine code so that it becomes
>> able to handle an input stream with a larger set of registers than
>> the destination and print useful information about which registers
>> are causing the trouble. The migration outcome is unchanged:
>> - unexpected registers still will fail the migration
>> - missing ones are printed but will not fail the migration, as done today.
>>
>> The input stream can contain MAX_CPREG_VMSTATE_ANOMALIES(10) extra
>> registers compared to what exists on the target.
>>
>> If there are more registers we will still hit the previous
>> "load cpu:cpreg_vmstate_array_len" error.
>>
>> At most, MAX_CPREG_VMSTATE_ANOMALIES missing registers
>> and MAX_CPREG_VMSTATE_ANOMALIES unexpected registers are printed.
>>
>> Example:
>>
>> qemu-system-aarch64: kvm_arm_cpu_post_load Missing register in input stream:
>> 0 0x6030000000160003 fw feat reg 3
>> qemu-system-aarch64: kvm_arm_cpu_post_load Unexpected register in input
>> stream: 0 0x603000000013c103 op0:3 op1:0 crn:2 crm:0 op2:3
>> qemu-system-aarch64: kvm_arm_cpu_post_load Unexpected register in input
>> stream: 1 0x603000000013c512 op0:3 op1:0 crn:10 crm:2 op2:2
>> qemu-system-aarch64: kvm_arm_cpu_post_load Unexpected register in input
>> stream: 2 0x603000000013c513 op0:3 op1:0 crn:10 crm:2 op2:3
>> qemu-system-aarch64: error while loading state for instance 0x0 of device
>> 'cpu'
>> qemu-system-aarch64: load of migration failed: Operation not permitted
>>
>> With TCG there is no user friendly formatting of the faulting
>> register indexes as with KVM. However the 2 added trace points
>> help to identify the culprit indexes.
>>
>> Signed-off-by: Eric Auger <[email protected]>
>> Reviewed-by: Cornelia Huck <[email protected]>
>>
>> ---
>>
>> v2 -> v3:
>> - some extra typos (Connie)
>> - collected Connie's R-b
>>
>> v1 -> v2:
>> - fixed some type in the commit msg
>> ---
>> target/arm/cpu.h | 6 +++++
>> target/arm/kvm.c | 23 ++++++++++++++++
>> target/arm/machine.c | 58 ++++++++++++++++++++++++++++++++++++-----
>> target/arm/trace-events | 7 +++++
>> 4 files changed, 88 insertions(+), 6 deletions(-)
>>
>> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
>> index 1eaf5a3fddf..e900ef7937b 100644
>> --- a/target/arm/cpu.h
>> +++ b/target/arm/cpu.h
>> @@ -939,6 +939,12 @@ struct ArchCPU {
>> uint64_t *cpreg_vmstate_values;
>> int32_t cpreg_vmstate_array_len;
>>
>> + #define MAX_CPREG_VMSTATE_ANOMALIES 10
>> + uint64_t cpreg_vmstate_missing_indexes[MAX_CPREG_VMSTATE_ANOMALIES];
>> + int32_t cpreg_vmstate_missing_indexes_array_len;
>> + uint64_t cpreg_vmstate_unexpected_indexes[MAX_CPREG_VMSTATE_ANOMALIES];
>> + int32_t cpreg_vmstate_unexpected_indexes_array_len;
>> +
> This seems a bit old school when we have GArray.
thanks for jumping in. Agreed. if we manage to have a generic TCG/KVM
print_register_name() I hope those can even be removed.
Thanks!
Eric
>
>> DynamicGDBFeatureInfo dyn_sysreg_feature;
>> DynamicGDBFeatureInfo dyn_svereg_feature;
>> DynamicGDBFeatureInfo dyn_smereg_feature;
>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>> index 48f853fff80..c6f0d0fc4e1 100644
>> --- a/target/arm/kvm.c
>> +++ b/target/arm/kvm.c
>> @@ -1024,6 +1024,29 @@ void kvm_arm_cpu_pre_save(ARMCPU *cpu)
>>
>> bool kvm_arm_cpu_post_load(ARMCPU *cpu)
>> {
>> + int i;
>> +
>> + for (i = 0; i < cpu->cpreg_vmstate_missing_indexes_array_len; i++) {
>> + gchar *name;
>> +
>> + name =
>> kvm_print_register_name(cpu->cpreg_vmstate_missing_indexes[i]);
>> + trace_kvm_arm_cpu_post_load_missing_reg(name);
>> + g_free(name);
>> + }
>> +
>> + for (i = 0; i < cpu->cpreg_vmstate_unexpected_indexes_array_len; i++) {
>> + gchar *name;
>> +
>> + name =
>> kvm_print_register_name(cpu->cpreg_vmstate_unexpected_indexes[i]);
>> + error_report("%s Unexpected register in input stream: %i
>> 0x%"PRIx64" %s",
>> + __func__, i, cpu->cpreg_vmstate_unexpected_indexes[i],
>> name);
>> + g_free(name);
>> + }
>> + /* Fail the migration if we detect unexpected registers */
>> + if (cpu->cpreg_vmstate_unexpected_indexes_array_len) {
>> + return false;
>> + }
>> +
>> if (!write_list_to_kvmstate(cpu, KVM_PUT_FULL_STATE)) {
>> return false;
>> }
>> diff --git a/target/arm/machine.c b/target/arm/machine.c
>> index 0befdb0b28a..f06a920aba1 100644
>> --- a/target/arm/machine.c
>> +++ b/target/arm/machine.c
>> @@ -10,6 +10,7 @@
>> #include "migration/vmstate.h"
>> #include "target/arm/gtimer.h"
>> #include "hw/arm/machines-qom.h"
>> +#include "trace.h"
>>
>> static bool vfp_needed(void *opaque)
>> {
>> @@ -990,7 +991,13 @@ static int cpu_pre_load(void *opaque)
>> {
>> ARMCPU *cpu = opaque;
>> CPUARMState *env = &cpu->env;
>> + int arraylen = cpu->cpreg_vmstate_array_len +
>> MAX_CPREG_VMSTATE_ANOMALIES;
>>
>> + cpu->cpreg_vmstate_indexes = g_renew(uint64_t,
>> cpu->cpreg_vmstate_indexes,
>> + arraylen);
>> + cpu->cpreg_vmstate_values = g_renew(uint64_t, cpu->cpreg_vmstate_values,
>> + arraylen);
>> + cpu->cpreg_vmstate_array_len = arraylen;
> I wonder if these would be candidates for fixing up as well.
>
>> /*
>> * In an inbound migration where on the source FPSCR/FPSR/FPCR are 0,
>> * there will be no fpcr_fpsr subsection so we won't call vfp_set_fpcr()
>> @@ -1023,7 +1030,7 @@ static int cpu_post_load(void *opaque, int version_id)
>> {
>> ARMCPU *cpu = opaque;
>> CPUARMState *env = &cpu->env;
>> - int i, v;
>> + int i = 0, j = 0, k = 0, v = 0;
>>
>> /*
>> * Handle migration compatibility from old QEMU which didn't
>> @@ -1051,27 +1058,66 @@ static int cpu_post_load(void *opaque, int
>> version_id)
>> * entries with the right slots in our own values array.
>> */
>>
>> - for (i = 0, v = 0; i < cpu->cpreg_array_len
>> - && v < cpu->cpreg_vmstate_array_len; i++) {
>> + trace_cpu_post_load_len(cpu->cpreg_array_len,
>> cpu->cpreg_vmstate_array_len);
>> + for (; i < cpu->cpreg_array_len && v < cpu->cpreg_vmstate_array_len;) {
>> + trace_cpu_post_load(i, v , cpu->cpreg_indexes[i]);
>> if (cpu->cpreg_vmstate_indexes[v] > cpu->cpreg_indexes[i]) {
>> /* register in our list but not incoming : skip it */
>> + trace_cpu_post_load_missing(i, cpu->cpreg_indexes[i], v);
>> + if (j < MAX_CPREG_VMSTATE_ANOMALIES) {
>> + cpu->cpreg_vmstate_missing_indexes[j++] =
>> cpu->cpreg_indexes[i];
>> + }
>> + i++;
>> continue;
>> }
>> if (cpu->cpreg_vmstate_indexes[v] < cpu->cpreg_indexes[i]) {
>> - /* register in their list but not ours: fail migration */
>> - return -1;
>> + /* register in their list but not ours: those will fail
>> migration */
>> + trace_cpu_post_load_unexpected(v,
>> cpu->cpreg_vmstate_indexes[v], i);
>> + if (k < MAX_CPREG_VMSTATE_ANOMALIES) {
>> + cpu->cpreg_vmstate_unexpected_indexes[k++] =
>> + cpu->cpreg_vmstate_indexes[v];
>> + }
>> + v++;
>> + continue;
>> }
>> /* matching register, copy the value over */
>> cpu->cpreg_values[i] = cpu->cpreg_vmstate_values[v];
>> v++;
>> + i++;
>> }
>> + /*
>> + * if we have reached the end of the incoming array but there are
>> + * still regs in cpreg, continue parsing the regs which are missing
>> + * in the input stream
>> + */
>> + for ( ; i < cpu->cpreg_array_len; i++) {
>> + if (j < MAX_CPREG_VMSTATE_ANOMALIES) {
>> + trace_cpu_post_load_missing(i, cpu->cpreg_indexes[i], v);
>> + cpu->cpreg_vmstate_missing_indexes[j++] = cpu->cpreg_indexes[i];
>> + }
>> + }
>> + /*
>> + * if we have reached the end of the cpreg array but there are
>> + * still regs in the input stream, continue parsing the vmstate array
>> + */
>> + for ( ; v < cpu->cpreg_vmstate_array_len; v++) {
>> + if (k < MAX_CPREG_VMSTATE_ANOMALIES) {
>> + trace_cpu_post_load_unexpected(v,
>> cpu->cpreg_vmstate_indexes[v], i);
>> + cpu->cpreg_vmstate_unexpected_indexes[k++] =
>> + cpu->cpreg_vmstate_indexes[v];
>> + }
>> + }
>> +
>> + cpu->cpreg_vmstate_missing_indexes_array_len = j;
>> + cpu->cpreg_vmstate_unexpected_indexes_array_len = k;
>>
>> if (kvm_enabled()) {
>> if (!kvm_arm_cpu_post_load(cpu)) {
>> return -1;
>> }
>> } else {
>> - if (!write_list_to_cpustate(cpu)) {
>> + if (cpu->cpreg_vmstate_unexpected_indexes_array_len ||
>> + !write_list_to_cpustate(cpu)) {
>> return -1;
>> }
>> }
>> diff --git a/target/arm/trace-events b/target/arm/trace-events
>> index 676d29fe516..0a5ed3e69d5 100644
>> --- a/target/arm/trace-events
>> +++ b/target/arm/trace-events
>> @@ -13,6 +13,7 @@ arm_gt_update_irq(int timer, int irqstate) "gt_update_irq:
>> timer %d irqstate %d"
>>
>> # kvm.c
>> kvm_arm_fixup_msi_route(uint64_t iova, uint64_t gpa) "MSI iova =
>> 0x%"PRIx64" is translated into 0x%"PRIx64
>> +kvm_arm_cpu_post_load_missing_reg(char *name) "Missing register in input
>> stream: %s"
>>
>> # cpu.c
>> arm_cpu_reset(uint64_t mp_aff) "cpu %" PRIu64
>> @@ -26,3 +27,9 @@ arm_powerctl_reset_cpu(uint64_t mp_aff) "cpu %" PRIu64
>>
>> # tcg/psci.c and hvf/hvf.c
>> arm_psci_call(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint32_t
>> cpuid) "PSCI Call x0=0x%016"PRIx64" x1=0x%016"PRIx64" x2=0x%016"PRIx64"
>> x3=0x%016"PRIx64" cpuid=0x%x"
>> +
>> +# machine.c
>> +cpu_post_load_len(int cpreg_array_len, int cpreg_vmstate_array_len)
>> "cpreg_array_len=%d cpreg_vmstate_array_len=%d"
>> +cpu_post_load(int i, int v, uint64_t regidx) "i=%d v=%d regidx=0x%"PRIx64
>> +cpu_post_load_missing(int i, uint64_t regidx, int v) "missing register in
>> input stream: i=%d index=0x%"PRIx64" (v=%d)"
>> +cpu_post_load_unexpected(int v, uint64_t regidx, int i) "unexpected
>> register in input stream: v=%d index=0x%"PRIx64" (i=%d)"