On 3/26/19 5:25 AM, David Gibson wrote:
> On Fri, Mar 22, 2019 at 08:53:21AM +0100, Cédric Le Goater wrote:
>> On 3/22/19 2:00 AM, David Gibson wrote:
>>> On Thu, Mar 21, 2019 at 03:49:03PM +0100, Cédric Le Goater wrote:
>>>> This extends the KVM XIVE device backend with 'synchronize_state'
>>>> methods used to retrieve the state from KVM. The HW state of the
>>>> sources, the KVM device and the thread interrupt contexts are
>>>> collected for the monitor usage and also migration.
>>>>
>>>> These get operations rely on their KVM counterpart in the host kernel
>>>> which acts as a proxy for OPAL, the host firmware. The set operations
>>>> will be added for migration support later.
>>>>
>>>> Signed-off-by: Cédric Le Goater <c...@kaod.org>
>>>> ---
>>>>
>>>>  Changes since v2:
>>>>
>>>>  - removed the capture of the OS CAM line value from KVM
>>>>  - added xive_end_is_valid() check
>>>>
>>>>  include/hw/ppc/spapr_xive.h |  8 ++++
>>>>  include/hw/ppc/xive.h       |  1 +
>>>>  hw/intc/spapr_xive.c        | 17 +++++---
>>>>  hw/intc/spapr_xive_kvm.c    | 87 +++++++++++++++++++++++++++++++++++++
>>>>  hw/intc/xive.c              | 10 +++++
>>>>  5 files changed, 116 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>>>> index 03685910e76b..7e49badd8c9a 100644
>>>> --- a/include/hw/ppc/spapr_xive.h
>>>> +++ b/include/hw/ppc/spapr_xive.h
>>>> @@ -44,6 +44,13 @@ typedef struct SpaprXive {
>>>>      void          *tm_mmap;
>>>>  } SpaprXive;
>>>>  
>>>> +/*
>>>> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>>>> + * to the controller block id value. It can nevertheless be changed
>>>> + * for testing purpose.
>>>> + */
>>>> +#define SPAPR_XIVE_BLOCK_ID 0x0
>>>> +
>>>>  bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
>>>>  bool spapr_xive_irq_free(SpaprXive *xive, uint32_t lisn);
>>>>  void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon);
>>>> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(SpaprXive *xive, 
>>>> uint8_t end_blk,
>>>>  void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
>>>>                                   uint32_t end_idx, XiveEND *end,
>>>>                                   Error **errp);
>>>> +void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp);
>>>>  
>>>>  #endif /* PPC_SPAPR_XIVE_H */
>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>> index dd115da30ebc..78c919c4a590 100644
>>>> --- a/include/hw/ppc/xive.h
>>>> +++ b/include/hw/ppc/xive.h
>>>> @@ -435,5 +435,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, 
>>>> int srcno, Error **errp);
>>>>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>>>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>>>>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
>>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
>>>>  
>>>>  #endif /* PPC_XIVE_H */
>>>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>>>> index cde2fd7c8997..4d07140f1078 100644
>>>> --- a/hw/intc/spapr_xive.c
>>>> +++ b/hw/intc/spapr_xive.c
>>>> @@ -40,13 +40,6 @@
>>>>  
>>>>  #define SPAPR_XIVE_NVT_BASE 0x400
>>>>  
>>>> -/*
>>>> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>>>> - * to the controller block id value. It can nevertheless be changed
>>>> - * for testing purpose.
>>>> - */
>>>> -#define SPAPR_XIVE_BLOCK_ID 0x0
>>>> -
>>>>  /*
>>>>   * sPAPR NVT and END indexing helpers
>>>>   */
>>>> @@ -156,6 +149,16 @@ void spapr_xive_pic_print_info(SpaprXive *xive, 
>>>> Monitor *mon)
>>>>      XiveSource *xsrc = &xive->source;
>>>>      int i;
>>>>  
>>>> +    if (kvm_irqchip_in_kernel()) {
>>>> +        Error *local_err = NULL;
>>>> +
>>>> +        kvmppc_xive_synchronize_state(xive, &local_err);
>>>> +        if (local_err) {
>>>> +            error_report_err(local_err);
>>>> +            return;
>>>> +        }
>>>> +    }
>>>> +
>>>>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
>>>>  
>>>>      for (i = 0; i < xive->nr_irqs; i++) {
>>>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
>>>> index 2714f8e4702e..2e46661cb3ad 100644
>>>> --- a/hw/intc/spapr_xive_kvm.c
>>>> +++ b/hw/intc/spapr_xive_kvm.c
>>>> @@ -60,6 +60,51 @@ static void kvm_cpu_enable(CPUState *cs)
>>>>  /*
>>>>   * XIVE Thread Interrupt Management context (KVM)
>>>>   */
>>>> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
>>>> +{
>>>> +    uint64_t state[2] = { 0 };
>>>> +    int ret;
>>>> +
>>>> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
>>>> +    if (ret != 0) {
>>>> +        error_setg_errno(errp, errno,
>>>> +                         "XIVE: could not capture KVM state of CPU %ld",
>>>> +                         kvm_arch_vcpu_id(tctx->cs));
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /* word0 and word1 of the OS ring. */
>>>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
>>>> +}
>>>> +
>>>> +typedef struct {
>>>> +    XiveTCTX *tctx;
>>>> +    Error *err;
>>>> +} XiveCpuGetState;
>>>> +
>>>> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
>>>> +                                                 run_on_cpu_data arg)
>>>> +{
>>>> +    XiveCpuGetState *s = arg.host_ptr;
>>>> +
>>>> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
>>>> +}
>>>> +
>>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
>>>> +{
>>>> +    XiveCpuGetState s = {
>>>> +        .tctx = tctx,
>>>> +        .err = NULL,
>>>> +    };
>>>> +
>>>> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
>>>> +               RUN_ON_CPU_HOST_PTR(&s));
>>>
>>> I think I brought this up earlier, but I'm still confused.
>>>
>>> Retreiving information with GET_ONE_REG doesn't usually need dancing
>>> around to run on a particular thread.  Why is it necessary here?
>>
>> The thread can be scheduled, so we need to kick it to get the thread
>> interrupt context registers. This is very much like XICS getting the 
>> ICP state.
>>
>> May be there is something I am not getting from your question ? I am
>> not an expert of the VCPU scheduling area either.
> 
> Ah, right I see.  We need to make sure the vcpu is not running so we
> don't get stale information.  Ok, I think I'll remember that for the
> next review round, should be ok as it is.

It's not only getting stale information, it can also result in a QEMU 
CPU stall.

I will add a comment for next around. 

Thanks,

C. 


Reply via email to