On 10.05.2013, at 19:31, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:ag...@suse.de]
>> Sent: Friday, May 10, 2013 3:48 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; k...@vger.kernel.org; Wood Scott-B07421;
>> tiejun.c...@windriver.com; Bhushan Bharat-R65777
>> Subject: Re: [PATCH] KVM: PPC: Add userspace debug stub support
>> 
>> 
>> On 07.05.2013, at 11:40, Bharat Bhushan wrote:
>> 
>>> This patch adds the debug stub support on booke/bookehv.
>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>> breakpoint to debug guest.
>>> 
>>> This is how we save/restore debug register context when switching
>>> between guest, userspace and kernel user-process:
>>> 
>>> When QEMU is running
>>> -> thread->debug_reg == QEMU debug register context.
>>> -> Kernel will handle switching the debug register on context switch.
>>> -> no vcpu_load() called
>>> 
>>> QEMU makes ioctls (except RUN)
>>> -> This will call vcpu_load()
>>> -> should not change context.
>>> -> Some ioctls can change vcpu debug register, context saved in
>>> -> vcpu->debug_regs
>>> 
>>> QEMU Makes RUN ioctl
>>> -> Save thread->debug_reg on STACK
>>> -> Store thread->debug_reg == vcpu->debug_reg load thread->debug_reg
>>> -> RUN VCPU ( So thread points to vcpu context )
>>> 
>>> Context switch happens When VCPU running
>>> -> makes vcpu_load() should not load any context kernel loads the vcpu
>>> -> context as thread->debug_regs points to vcpu context.
>>> 
>>> On heavyweight_exit
>>> -> Load the context saved on stack in thread->debug_reg
>>> 
>>> Currently we do not support debug resource emulation to guest, On
>>> debug exception, always exit to user space irrespective of user space
>>> is expecting the debug exception or not. If this is unexpected
>>> exception (breakpoint/watchpoint event not set by
>>> userspace) then let us leave the action on user space. This is similar
>>> to what it was before, only thing is that now we have proper exit
>>> state available to user space.
>>> 
>>> Signed-off-by: Bharat Bhushan <bharat.bhus...@freescale.com>
>>> ---
>>> arch/powerpc/include/asm/kvm_host.h |    3 +
>>> arch/powerpc/include/uapi/asm/kvm.h |    1 +
>>> arch/powerpc/kvm/booke.c            |  242 
>>> ++++++++++++++++++++++++++++++++---
>>> arch/powerpc/kvm/booke.h            |    5 +
>>> 4 files changed, 233 insertions(+), 18 deletions(-)
>>> 
>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>> b/arch/powerpc/include/asm/kvm_host.h
>>> index 838a577..1b29945 100644
>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>> @@ -524,7 +524,10 @@ struct kvm_vcpu_arch {
>>>     u32 eptcfg;
>>>     u32 epr;
>>>     u32 crit_save;
>>> +   /* guest debug registers*/
>>>     struct debug_reg dbg_reg;
>>> +   /* shadow debug registers */
>> 
>> Please be more verbose here. What exactly does this contain? Why do we need
>> shadow and non-shadow registers? The comment as it is reads like
>> 
>>  /* Add one plus one */
>>  x = 1 + 1;
> 
> 
> /*
> * Shadow debug registers hold the debug register content
> * to be written in h/w debug register on behalf of guest
> * written value or user space written value.
> */

/* hardware visible debug registers when in guest state */

> 
> 
>> 
>>> +   struct debug_reg shadow_dbg_reg;
>>> #endif
>>>     gpa_t paddr_accessed;
>>>     gva_t vaddr_accessed;
>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h
>>> b/arch/powerpc/include/uapi/asm/kvm.h
>>> index ded0607..f5077c2 100644
>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>>> @@ -27,6 +27,7 @@
>>> #define __KVM_HAVE_PPC_SMT
>>> #define __KVM_HAVE_IRQCHIP
>>> #define __KVM_HAVE_IRQ_LINE
>>> +#define __KVM_HAVE_GUEST_DEBUG
>>> 
>>> struct kvm_regs {
>>>     __u64 pc;
>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
>>> ef99536..6a44ad4 100644
>>> --- a/arch/powerpc/kvm/booke.c
>>> +++ b/arch/powerpc/kvm/booke.c
>>> @@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu
>>> *vcpu) #endif }
>>> 
>>> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
>>> +   /* Synchronize guest's desire to get debug interrupts into shadow
>>> +MSR */ #ifndef CONFIG_KVM_BOOKE_HV
>>> +   vcpu->arch.shadow_msr &= ~MSR_DE;
>>> +   vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; #endif
>>> +
>>> +   /* Force enable debug interrupts when user space wants to debug */
>>> +   if (vcpu->guest_debug) {
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +           /*
>>> +            * Since there is no shadow MSR, sync MSR_DE into the guest
>>> +            * visible MSR.
>>> +            */
>>> +           vcpu->arch.shared->msr |= MSR_DE;
>>> +#else
>>> +           vcpu->arch.shadow_msr |= MSR_DE;
>>> +           vcpu->arch.shared->msr &= ~MSR_DE;
>>> +#endif
>>> +   }
>>> +}
>>> +
>>> /*
>>> * Helper function for "full" MSR writes.  No need to call this if
>>> only
>>> * EE/CE/ME/DE/RI are changing.
>>> @@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
>>>     kvmppc_mmu_msr_notify(vcpu, old_msr);
>>>     kvmppc_vcpu_sync_spe(vcpu);
>>>     kvmppc_vcpu_sync_fpu(vcpu);
>>> +   kvmppc_vcpu_sync_debug(vcpu);
>>> }
>>> 
>>> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
>>> -655,6 +679,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
>>> int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) {
>>>     int ret, s;
>>> +   struct debug_reg debug;
>>> #ifdef CONFIG_PPC_FPU
>>>     unsigned int fpscr;
>>>     int fpexc_mode;
>>> @@ -699,11 +724,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct
>> kvm_vcpu *vcpu)
>>>     kvmppc_load_guest_fp(vcpu);
>>> #endif
>>> 
>>> +   /*
>>> +    * Clear current->thread.dbcr0 so that kernel does not
>>> +    * restore h/w registers on context switch in vcpu running state.
>>> +    */
>> 
>> Incorrect comment?
> 
> Leftover from previous code, will remove this.
> 
>> 
>>> +   /* Save thread->debug context on stack */
>> 
>> /* Switch to guest debug context */
>> 
>>> +   memcpy(&debug, &current->thread.debug, sizeof(struct debug_reg));
>> 
>> debug = current->thread.debug;
>> 
>>> +   /* Load vcpu debug context in thread->debug */
>>> +   memcpy(&current->thread.debug, &vcpu->arch.shadow_dbg_reg,
>>> +          sizeof(struct debug_reg));
>> 
>> current->thread.debug = vcpu->arch.shadow_dbg_reg;
>> 
>>> +
>>> +   switch_booke_debug_regs(&current->thread);
>>> +
>>>     ret = __kvmppc_vcpu_run(kvm_run, vcpu);
>>> 
>>>     /* No need for kvm_guest_exit. It's done in handle_exit.
>>>        We also get here with interrupts enabled. */
>>> 
>>> +   /* Restore userspace context in thread->dbcr0 from stack*/
>> 
>> /* Switch back to user space debug context */
>> 
>>> +   memcpy(&current->thread.debug, &debug, sizeof(struct debug_reg));
>> 
>> current->thread.debug = debug;
>> 
>>> +   switch_booke_debug_regs(&current->thread);
>>> +
>>> #ifdef CONFIG_PPC_FPU
>>>     kvmppc_save_guest_fp(vcpu);
>>> 
>>> @@ -759,6 +800,36 @@ static int emulation_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu)
>>>     }
>>> }
>>> 
>>> +/*
>>> + * Currently we do not support debug resource emulation to guest,
>>> + * so always exit to user space irrespective of user space is
>>> + * expecting the debug exception or not. This is unexpected event
>>> + * and let us leave the action on user space.
>> 
>> Please rework your wording.
>> 
>>> + */
>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
>>> +*vcpu) {
>>> +   struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>> +   u32 dbsr = vcpu->arch.dbsr;
>>> +
>>> +   run->debug.arch.status = 0;
>>> +   run->debug.arch.address = vcpu->arch.pc;
>>> +
>>> +   if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
>>> +           run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
>>> +   } else {
>>> +           if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
>>> +                   run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
>>> +           else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
>>> +                   run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
>>> +           if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
>>> +                   run->debug.arch.address = dbg_reg->dac1;
>>> +           else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
>>> +                   run->debug.arch.address = dbg_reg->dac2;
>>> +   }
>>> +
>>> +   return RESUME_HOST;
>>> +}
>>> +
>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
>>>     ulong r1, ip, msr, lr;
>>> @@ -819,6 +890,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu
>> *vcpu,
>>>     case BOOKE_INTERRUPT_CRITICAL:
>>>             unknown_exception(&regs);
>>>             break;
>>> +   case BOOKE_INTERRUPT_DEBUG:
>>> +           /* Save DBSR before preemption is enabled */
>>> +           vcpu->arch.dbsr = mfspr(SPRN_DBSR);
>>> +           kvmppc_clear_dbsr();
>>> +           break;
>>>     }
>>> }
>>> 
>>> @@ -1118,18 +1194,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu,
>>>     }
>>> 
>>>     case BOOKE_INTERRUPT_DEBUG: {
>>> -           u32 dbsr;
>>> -
>>> -           vcpu->arch.pc = mfspr(SPRN_CSRR0);
>>> -
>>> -           /* clear IAC events in DBSR register */
>>> -           dbsr = mfspr(SPRN_DBSR);
>>> -           dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
>>> -           mtspr(SPRN_DBSR, dbsr);
>>> -
>>> -           run->exit_reason = KVM_EXIT_DEBUG;
>>> +           r = kvmppc_handle_debug(run, vcpu);
>>> +           if (r == RESUME_HOST)
>>> +                   run->exit_reason = KVM_EXIT_DEBUG;
>>>             kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>> -           r = RESUME_HOST;
>>>             break;
>>>     }
>>> 
>>> @@ -1180,7 +1248,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>>>     kvmppc_set_msr(vcpu, 0);
>>> 
>>> #ifndef CONFIG_KVM_BOOKE_HV
>>> -   vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
>>> +   vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
>>>     vcpu->arch.shadow_pid = 1;
>>>     vcpu->arch.shared->msr = 0;
>>> #endif
>>> @@ -1557,12 +1625,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
>> struct kvm_one_reg *reg)
>>>     return r;
>>> }
>>> 
>>> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>> -                                    struct kvm_guest_debug *dbg)
>>> -{
>>> -   return -EINVAL;
>>> -}
>>> -
>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu
>>> *fpu) {
>>>     return -ENOTSUPP;
>>> @@ -1668,6 +1730,145 @@ void kvmppc_decrementer_func(unsigned long data)
>>>     kvmppc_set_tsr_bits(vcpu, TSR_DIS);
>>> }
>>> 
>>> +static int kvmppc_booke_set_breakpoint(struct kvm_vcpu *vcpu,
>>> +uint64_t addr,
>> 
>> s/set/add/
>> 
>>> +                                  int index)
>>> +{
>>> +   struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>> +
>>> +   switch (index) {
>>> +   case 0:
>>> +           dbg_reg->dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
>>> +           dbg_reg->iac1 = addr;
>>> +           break;
>>> +   case 1:
>>> +           dbg_reg->dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
>>> +           dbg_reg->iac2 = addr;
>>> +           break;
>>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
>>> +   case 2:
>>> +           dbg_reg->dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
>>> +           dbg_reg->iac3 = addr;
>>> +           break;
>>> +   case 3:
>>> +           dbg_reg->dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
>>> +           dbg_reg->iac4 = addr;
>>> +           break;
>>> +#endif
>>> +   default:
>>> +           return -EINVAL;
>>> +   }
>> 
>>    dbg_reg->dbcr0 |= DBCR0_IDM;
>> 
>>> +   return 0;
>>> +}
>>> +
>>> +static int kvmppc_booke_set_watchpoint(struct kvm_vcpu *vcpu,
>>> +uint64_t addr,
>> 
>> s/set/add/
>> 
>>> +                                  int type, int index)
>>> +{
>>> +   struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>> +
>>> +   switch (index) {
>>> +   case 0:
>>> +           if (type & KVMPPC_DEBUG_WATCH_READ)
>>> +                   dbg_reg->dbcr0 |= DBCR0_DAC1R | DBCR0_IDM;
>>> +           if (type & KVMPPC_DEBUG_WATCH_WRITE)
>>> +                   dbg_reg->dbcr0 |= DBCR0_DAC1W | DBCR0_IDM;
>>> +           dbg_reg->dac1 = addr;
>>> +           break;
>>> +   case 1:
>>> +           if (type & KVMPPC_DEBUG_WATCH_READ)
>>> +                   dbg_reg->dbcr0 |= DBCR0_DAC2R | DBCR0_IDM;
>>> +           if (type & KVMPPC_DEBUG_WATCH_WRITE)
>>> +                   dbg_reg->dbcr0 |= DBCR0_DAC2W | DBCR0_IDM;
>>> +           dbg_reg->dac2 = addr;
>>> +           break;
>>> +   default:
>>> +           return -EINVAL;
>>> +   }
>> 
>>    dbg_reg->dbcr0 |= DBCR0_IDM;
>> 
>>> +   return 0;
>>> +}
>>> +
>>> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>> +                                    struct kvm_guest_debug *dbg)
>>> +{
>>> +   struct debug_reg *dbg_reg;
>>> +   int n, b = 0, w = 0;
>>> +
>>> +   if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
>>> +           /* Clear All debug events */
>>> +           vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>> +           vcpu->guest_debug = 0;
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +           /*
>>> +            * When user space is not using the debug resources
>>> +            * then allow guest to change the MSR.DE.
>>> +            */
>>> +           vcpu->arch.shadow_msrp &= ~MSRP_DEP; #endif
>> 
>> guest_may_set_msr_de(vcpu, true);
>> 
>>> +           return 0;
>>> +   }
>>> +
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +   /*
>>> +    * When user space is using the debug resource then
>>> +    * do not allow guest to change the MSR.DE.
>>> +    */
>>> +   vcpu->arch.shadow_msrp |= MSRP_DEP;
>>> +#endif
>> 
>> guest_may_set_msr_de(vcpu, false);
>> 
>>> +   vcpu->guest_debug = dbg->control;
>>> +   vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>> +   /* Set DBCR0_EDM in guest visible DBCR0 register. */
>>> +   vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
>>> +
>>> +   if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
>>> +           vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
>>> +
>>> +   if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
>>> +           return 0;
>>> +
>>> +   /* Code below handles only HW breakpoints */
>>> +   dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>> +
>>> +   /*
>>> +    * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
>>> +    * to occur when MSR.PR is set.
>>> +    * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
>>> +    * should clear DBCR1 and DBCR2. And EPCR.DUVD is used to control
>>> +    * that debug events will not come in hypervisor (GS = 0).
> 
> Will rework the above comment as discussed.
> 
>> 
>> This is still wrong. We want to trap in PR=1. It's what PR KVM does, it's 
>> what
>> TCG does. There is no point in making HV KVM behave differently.
> 
> This comment is not valid.
> 
>> 
>>> +    */
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +   dbg_reg->dbcr1 = 0;
>>> +   dbg_reg->dbcr2 = 0;
>>> +#else
>>> +   dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
>>> +                     DBCR1_IAC4US;
>>> +   dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #endif
>>> +
>>> +   for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
>>> +           uint64_t addr = dbg->arch.bp[n].addr;
>>> +           uint32_t type = dbg->arch.bp[n].type;
>>> +
>>> +           if (type == KVMPPC_DEBUG_NONE)
>>> +                   continue;
>>> +
>>> +           if (type & !(KVMPPC_DEBUG_WATCH_READ |
>>> +                        KVMPPC_DEBUG_WATCH_WRITE |
>>> +                        KVMPPC_DEBUG_BREAKPOINT))
>>> +                   return -EINVAL;
>>> +
>>> +           if (type & KVMPPC_DEBUG_BREAKPOINT) {
>>> +                   /* Setting H/W breakpoint */
>>> +                   if (kvmppc_booke_set_breakpoint(vcpu, addr, b++))
>> 
>> Please pass dbg_reg into the function
>> 
>>> +                           return -EINVAL;
>>> +           } else {
>>> +                   /* Setting H/W watchpoint */
>>> +                   if (kvmppc_booke_set_watchpoint(vcpu, addr, type, w++))
>> 
>> here too
>> 
>>> +                           return -EINVAL;
>>> +           }
>>> +   }
>>> +
>>> +   return 0;
>>> +}
>>> +
>>> void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) {
>>>     vcpu->cpu = smp_processor_id();
>>> @@ -1678,6 +1879,11 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu
>>> *vcpu) {
>>>     current->thread.kvm_vcpu = NULL;
>>>     vcpu->cpu = -1;
>>> +
>>> +   /* Disable all debug events */
>>> +   mtspr(SPRN_DBCR0, 0x0);
>> 
>> Why? Wouldn't normal preemption handling take care of this already?
> 
> Yes, normal preemption will take care. On vcpu_put we do not clear EPCR.DUVD 
> and DBCR1/2.
> So debug event will not be taken in host.
> 
> So yes, it is not needed here.
> 
>> 
>>> +   /* Clear pending debug event in DBSR */
>>> +   kvmppc_clear_dbsr();
>> 
>> Is there a chance we will ever have to do this? On debug exits from guest
>> context we already clear dbsr.
> 
> DBSR can have events captured but not delivered as interrupt if MSR.DE is 
> clear. I know with qemu debug stub we do not allow msr.de to be cleared by 
> guest.

I see. In that case we'd have to save the original DBSR value into vcpu->arch 
though, no? Otherwise we might lose guest debug events on preemption if we 
simply clear it.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to