> >>>>> -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu
> >>>>> *vcpu)
> >>>>> +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> >>>>> +                         int exit_nr)
> >>>>> {
> >>>>>         enum emulation_result er;
> >>>>>
> >>>>> +       if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
> >>>>> +                    vcpu->arch.last_inst == KVMPPC_INST_GUEST_GDB) {
> >>>>
> >>>> This belongs into the normal emulation code path, behind the same
> >>>> switch() that everything else goes through.
> >>>
> >>> I am not sure I understood correctly. Below is the reason why I
> >>> placed this
> >> code here.
> >>> Instruction where software breakpoint is to be set is replaced by "ehpriv"
> >> instruction. On e500v2, this is not a valid instruction can causes
> >> program interrupt. On e500mc, "ehpriv" is a valid instruction. Both
> >> the exit path calls emulation_exit(), so we have placed the code in this
> function.
> >>> Do you want this code to be moved in program interrupt exit path for
> >>> e500v2
> >> and BOOKE_INTERRUPT_HV_PRIV for e500mc?
> >>
> >> Ok, in this patch you do (basically):
> >>
> >> int emulation_exit()
> >> {
> >>    if (inst == DEBUG_INST) {
> >>        debug_stuff();
> >>        return;
> >>    }
> >>
> >>    switch (inst) {
> >>        case INST_A:
> >>            foo();
> >>        ....
> >>    }
> >> }
> >
> > Are not we doing something like this:
> > int emulation_exit()
> > {
> >     if (inst == DEBUG_INST) {
> >         debug_stuff();
> >         return;
> >     }
> >
> >     status = kvmppc_emulate_instruction()
> >     switch (status) {
> >         case FAIL:
> >             foo();
> >         case DONE:
> >             foo1();
> >         ....
> >     }
> > }
> >
> > Do you want something like this:
> >
> > int emulation_exit()
> > {
> >
> >     status = kvmppc_emulate_instruction()
> >     switch (status) {
> >         case FAIL:
> >                     if (inst == DEBUG_INST) {
> >                     debug_stuff();
> >                   return;
> >                     }
> >             foo();
> >
> >         case DONE:
> >             foo1();
> >         ....
> >     }
> > }
> 
> No, I want the DEBUG_INST be handled the same as any other instruction we
> emulate.

I would like to understand what you are thinking:
What I derived is , add the instruction in kvmppc_emulate_instruction() (or its 
child function) which, 
1) fill the relevant information in run-> , kvmppc_account_exit(vcpu, 
DEBUG_EXITS); and returns EMULATION_DONE
 And in emulation_exit()
     status = kvmppc_emulate_instruction()
     switch (status) {
        case EMULATION_DONE:
                if (inst == DEBUG)
                        return RESUME_HOST;
     }
 Or
2) kvmppc_account_exit(vcpu, DEBUG_EXITS); returns EMULATION_DONE;
And in emulation_exit()
     status = kvmppc_emulate_instruction()
     switch (status) {
        case EMULATION_DONE:
                if (inst == DEBUG) {
                        fill run-> 
                        return RESUME_HOST;
                }
     }

Or
3) kvmppc_account_exit(vcpu, DEBUG_EXITS); returns a new status type 
(EMULATION_DEBUG_INST)
And in emulation_exit()
     status = kvmppc_emulate_instruction()
     switch (status) {
        case EMULATION_DEBUG_INST:
                        fill run-> 
                        return RESUME_HOST;
     }

> 
> >>
> >> what I want is:
> >>
> >> int emulation_exit()
> >> {
> >>    switch (inst) {
> >>        case INST_A:
> >>            foo(); break;
> >>        case DEBUG_INST:
> >>            debug_stuff(); break;
> >>        ....
> >>    }
> >> }
> >>
> >>>
> >>>
> >>>>
> >>>>> +               run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +               run->debug.arch.pc = vcpu->arch.pc;
> >>>>> +               run->debug.arch.exception = exit_nr;
> >>>>> +               run->debug.arch.status = 0;
> >>>>> +               kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>>>> +               return RESUME_HOST;
> >>>>> +       }
> >>>>> +
> >>>>>         er = kvmppc_emulate_instruction(run, vcpu);
> >>>>>         switch (er) {
> >>>>>         case EMULATE_DONE:
> >>>>> @@ -697,6 +711,44 @@ static int emulation_exit(struct kvm_run
> >>>>> *run, struct
> >>>> kvm_vcpu *vcpu)
> >>>>>         default:
> >>>>>                 BUG();
> >>>>>         }
> >>>>> +
> >>>>> +       if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_ENABLE) &&
> >>>>> +           (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
> >>>>
> >>>> I don't understand how this is supposed to work. When we enable
> >>>> singlestep, why would we end up in emulation_exit()?
> >>>
> >>> When singlestep is enabled then we set DBCR0[ICMP] and the debug
> >>> handler
> >> should be able to handle this. I think you are right.
> >>>
> >>>>
> >>>>> +               run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +               return RESUME_HOST;
> >>>>> +       }
> >>>>> +}
> >>>>> +
> >>>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct
> >>>>> +kvm_vcpu
> >>>>> +*vcpu) {
> >>>>> +       u32 dbsr;
> >>>>> +
> >>>>> +#ifndef CONFIG_KVM_BOOKE_HV
> >>>>> +       if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
> >>>>> +               vcpu->arch.pc = mfspr(SPRN_DSRR0);
> >>>>> +       else
> >>>>> +               vcpu->arch.pc = mfspr(SPRN_CSRR0); #endif
> >>>>
> >>>> Why doesn't this get handled in the asm code that recovers from the
> >>>> respective exceptions?
> >>>
> >>> Yes. I will remove this.
> >>>
> >>>>
> >>>>> +       dbsr = vcpu->arch.dbsr;
> >>>>> +
> >>>>> +       run->debug.arch.pc = vcpu->arch.pc;
> >>>>> +       run->debug.arch.status = 0;
> >>>>> +       vcpu->arch.dbsr = 0;
> >>>>> +
> >>>>> +       if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> >>>>> +               run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> >>>>> +       } else {
> >>>>> +               if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> >>>>> +                       run->debug.arch.status |= 
> >>>>> KVMPPC_DEBUG_WATCH_WRITE;
> >>>>> +               else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> >>>>> +                       run->debug.arch.status |= 
> >>>>> KVMPPC_DEBUG_WATCH_READ;
> >>>>> +               if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> >>>>> +                       run->debug.arch.pc = 
> >>>>> vcpu->arch.shadow_dbg_reg.dac[0];
> >>>>> +               else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> >>>>> +                       run->debug.arch.pc = 
> >>>>> vcpu->arch.shadow_dbg_reg.dac[1];
> >>>>> +       }
> >>>>
> >>>> Mind to explain the guest register sharing mechanism you are
> >>>> envisioning? Which registers are used by the guest? Which are used
> >>>> by the host? Who decides what gets used by whom?
> >>>
> >>> struct kvm_vcpu_arch {
> >>>   .
> >>>   .
> >>>
> >>>      /* guest debug registers*/
> >>>       struct kvmppc_booke_debug_reg dbg_reg;
> >>>      /* shadow debug registers */
> >>>      struct kvmppc_booke_debug_reg shadow_dbg_reg;
> >>>      /* host debug registers*/
> >>>      struct kvmppc_booke_debug_reg host_dbg_reg;
> >>>
> >>>   .
> >>>   .
> >>> }
> >>>
> >>> dbg_reg: contains the values written by guest.
> >>> shadow_dbg_reg: This contains the actual values to be written on
> >>> behalf of
> >> guest/qemu.
> >>> host_dbg_reg: the debug register value of host (needed for
> >>> store/retore
> >> purpose).
> >>>
> >>> According to current design both, the qemu debug stub and guest,
> >>> cannot share
> >> the debug resources. QEMU debug stub is given priority.
> >>> But host and guest can share all debug registers and host cannot
> >>> debug guest
> >> if it is using the debug resource (which probably does not look like
> >> a good case).
> >>
> >> Ok. Assume that in guest context, we only ever want guest debugging
> >> enabled. If QEMU wants to inject a QEMU breakpoint, it will fiddle
> >> with the guest debug registers itself to inject its breakpoint into them.
> >
> > I would like to understand the context you are describing.
> > QEMU would like to inject debug interrupt to guest only if a debug interrupt
> happened in guest context and QEMU is not able to handle interrupt (because 
> qemu
> debug have not set). Right?
> 
> Yes
> 
> > In that case the only thing that needed to be updated is guest->DBSR?
> 
> I think you lost me here :)

Really :) . May be I wrote something wrong. 

> 
> >
> >>
> >> With that scheme, would we still need shadow debug registers? Or
> >> could we remove that one level of indirection?
> >>
> >>>
> >>>>
> >>>>> +
> >>>>> +       return RESUME_HOST;
> >>>>> }
> >>>>>
> >>>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) @@ -843,7
> >>>>> +895,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> >>>> kvm_vcpu *vcpu,
> >>>>>                 break;
> >>>>>
> >>>>>         case BOOKE_INTERRUPT_HV_PRIV:
> >>>>> -               r = emulation_exit(run, vcpu);
> >>>>> +               r = emulation_exit(run, vcpu, exit_nr);
> >>>>>                 break;
> >>>>>
> >>>>>         case BOOKE_INTERRUPT_PROGRAM:
> >>>>> @@ -862,7 +914,7 @@ int kvmppc_handle_exit(struct kvm_run *run,
> >>>>> struct
> >>>> kvm_vcpu *vcpu,
> >>>>>                         break;
> >>>>>                 }
> >>>>>
> >>>>> -               r = emulation_exit(run, vcpu);
> >>>>> +               r = emulation_exit(run, vcpu, exit_nr);
> >>>>>                 break;
> >>>>>
> >>>>>         case BOOKE_INTERRUPT_FP_UNAVAIL:
> >>>>> @@ -1052,18 +1104,12 @@ int kvmppc_handle_exit(struct kvm_run
> >>>>> *run, struct
> >>>> kvm_vcpu *vcpu,
> >>>>>         }
> >>>>>
> >>>>>         case BOOKE_INTERRUPT_DEBUG: {
> >>>>> -               u32 dbsr;
> >>>>> -
> >>>>> -               vcpu->arch.pc = mfspr(SPRN_CSRR0);
> >>>>> -
> >>>>> -               /* clear IAC events in DBSR register */
> >>>>> -               dbsr = mfspr(SPRN_DBSR);
> >>>>> -               dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> >>>>> -               mtspr(SPRN_DBSR, dbsr);
> >>>>> -
> >>>>> -               run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +               r = kvmppc_handle_debug(run, vcpu);
> >>>>> +               if (r == RESUME_HOST) {
> >>>>> +                       run->debug.arch.exception = exit_nr;
> >>>>> +                       run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +               }
> >>>>>                 kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>>>> -               r = RESUME_HOST;
> >>>>>                 break;
> >>>>>         }
> >>>>>
> >>>>> @@ -1403,10 +1449,78 @@ int kvm_vcpu_ioctl_set_one_reg(struct
> >>>>> kvm_vcpu *vcpu,
> >>>> struct kvm_one_reg *reg)
> >>>>>         return r;
> >>>>> }
> >>>>>
> >>>>> +#define BP_NUM KVMPPC_BOOKE_IAC_NUM
> >>>>> +#define WP_NUM KVMPPC_BOOKE_DAC_NUM
> >>>>> +
> >>>>> int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> >>>>>                                          struct kvm_guest_debug *dbg) {
> >>>>> -       return -EINVAL;
> >>>>> +
> >>>>> +       if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> >>>>> +               /* Clear All debug events */
> >>>>> +               vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>>>> +               vcpu->guest_debug = 0;
> >>>>> +               return 0;
> >>>>> +       }
> >>>>> +
> >>>>> +       vcpu->guest_debug = dbg->control;
> >>>>> +       vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>>>> +
> >>>>> +       if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> >>>>> +               vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> >>>>> +
> >>>>> +       if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
> >>>>> +               struct kvmppc_booke_debug_reg *gdbgr =
> >>>>> +                               &(vcpu->arch.shadow_dbg_reg);
> >>>>> +               int n, b = 0, w = 0;
> >>>>> +               const u32 bp_code[] = {
> >>>>> +                       DBCR0_IAC1 | DBCR0_IDM,
> >>>>> +                       DBCR0_IAC2 | DBCR0_IDM,
> >>>>> +                       DBCR0_IAC3 | DBCR0_IDM,
> >>>>> +                       DBCR0_IAC4 | DBCR0_IDM
> >>>>> +               };
> >>>>> +               const u32 wp_code[] = {
> >>>>> +                       DBCR0_DAC1W | DBCR0_IDM,
> >>>>> +                       DBCR0_DAC2W | DBCR0_IDM,
> >>>>> +                       DBCR0_DAC1R | DBCR0_IDM,
> >>>>> +                       DBCR0_DAC2R | DBCR0_IDM
> >>>>> +               };
> >>>>> +
> >>>>> +#ifndef CONFIG_KVM_BOOKE_HV
> >>>>> +               gdbgr->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
> >>>>> +                               DBCR1_IAC3US | DBCR1_IAC4US;
> >>>>> +               gdbgr->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #else
> >>>>> +               gdbgr->dbcr1 = 0;
> >>>>> +               gdbgr->dbcr2 = 0;
> >>>>> +#endif
> >>>>> +
> >>>>> +               for (n = 0; n < (BP_NUM + WP_NUM); n++) {
> >>>>> +                       u32 type = dbg->arch.bp[n].type;
> >>>>> +
> >>>>> +                       if (!type)
> >>>>> +                               break;
> >>>>> +
> >>>>> +                       if (type & (KVMPPC_DEBUG_WATCH_READ |
> >>>>> +                                   KVMPPC_DEBUG_WATCH_WRITE)) {
> >>>>> +                               if (w < WP_NUM) {
> >>>>> +                                       if (type & 
> >>>>> KVMPPC_DEBUG_WATCH_READ)
> >>>>> +                                               gdbgr->dbcr0 |= 
> >>>>> wp_code[w + 2];
> >>>>> +                                       if (type & 
> >>>>> KVMPPC_DEBUG_WATCH_WRITE)
> >>>>> +                                               gdbgr->dbcr0 |= 
> >>>>> wp_code[w];
> >>>>> +                                       gdbgr->dac[w] = 
> >>>>> dbg->arch.bp[n].addr;
> >>>>> +                                       w++;
> >>>>> +                               }
> >>>>> +                       } else if (type & KVMPPC_DEBUG_BREAKPOINT) {
> >>>>> +                               if (b < BP_NUM) {
> >>>>> +                                       gdbgr->dbcr0 |= bp_code[b];
> >>>>> +                                       gdbgr->iac[b] = 
> >>>>> dbg->arch.bp[n].addr;
> >>>>> +                                       b++;
> >>>>> +                               }
> >>>>> +                       }
> >>>>> +               }
> >>>>> +       }
> >>>>> +       return 0;
> >>>>> }
> >>>>>
> >>>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
> >>>>> kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_interrupts.S
> >>>> b/arch/powerpc/kvm/booke_interrupts.S
> >>>>> index dd9c5d4..2202936 100644
> >>>>> --- a/arch/powerpc/kvm/booke_interrupts.S
> >>>>> +++ b/arch/powerpc/kvm/booke_interrupts.S
> >>>>> @@ -39,6 +39,8 @@
> >>>>> #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4) #define
> >>>>> HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
> >>>>> #define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. 
> >>>>> */
> >>>>> +#define DBCR0_AC_BITS  (DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 |
> DBCR0_IAC4 | \
> >>>>> +                        DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R |
> DBCR0_DAC2W)
> >>>>>
> >>>>> #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
> >>>>>                       (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ @@ -52,6
> >>>>> +54,8 @@
> >>>>>                      (1<<BOOKE_INTERRUPT_PROGRAM) | \
> >>>>>                      (1<<BOOKE_INTERRUPT_DTLB_MISS))
> >>>>>
> >>>>> +#define NEED_DEBUG_SAVE (1<<BOOKE_INTERRUPT_DEBUG)
> >>>>> +
> >>>>> .macro __KVM_HANDLER ivor_nr scratch srr0
> >>>>>         stw     r3, VCPU_GPR(R3)(r4)
> >>>>>         stw     r5, VCPU_GPR(R5)(r4)
> >>>>> @@ -212,6 +216,61 @@ _GLOBAL(kvmppc_resume_host)
> >>>>>         stw     r9, VCPU_FAULT_ESR(r4)
> >>>>> ..skip_esr:
> >>>>>
> >>>>> +       addi    r7, r4, VCPU_HOST_DBG
> >>>>> +       mfspr   r9, SPRN_DBCR0
> >>>>> +       lwz     r8, KVMPPC_DBG_DBCR0(r7)
> >>>>> +       andis.  r9, r9, DBCR0_AC_BITS@h
> >>>>> +       beq     skip_load_host_debug
> >>>>> +       li      r9, 0
> >>>>> +       mtspr   SPRN_DBCR0, r9          /* disable all debug event */
> >>>>> +       lwz     r9, KVMPPC_DBG_DBCR1(r7)
> >>>>> +       mtspr   SPRN_DBCR1, r9
> >>>>> +       lwz     r9, KVMPPC_DBG_DBCR2(r7)
> >>>>> +       mtspr   SPRN_DBCR2, r9
> >>>>> +       lwz     r9, KVMPPC_DBG_IAC1+4(r7)
> >>>>> +       mtspr   SPRN_IAC1, r9
> >>>>> +       lwz     r9, KVMPPC_DBG_IAC2+4(r7)
> >>>>> +       mtspr   SPRN_IAC2, r9
> >>>>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> >>>>> +       lwz     r9, KVMPPC_DBG_IAC3+4(r7)
> >>>>> +       mtspr   SPRN_IAC3, r9
> >>>>> +       lwz     r9, KVMPPC_DBG_IAC4+4(r7)
> >>>>> +       mtspr   SPRN_IAC4, r9
> >>>>> +#endif
> >>>>> +       lwz     r9, KVMPPC_DBG_DAC1+4(r7)
> >>>>> +       mtspr   SPRN_DAC1, r9
> >>>>> +       lwz     r9, KVMPPC_DBG_DAC2+4(r7)
> >>>>
> >>>> Yikes. Please move this into a struct, similar to how the MSR
> >>>> save/restore happens on x86.
> >>>>
> >>>> Also, these are 64-bit loads and stores, no?
> >>>
> >>> Ok, you mean PPC_LD/STD?
> >>
> >> Yeah :).
> >>
> >>>
> >>>>
> >>>>> +       mtspr   SPRN_DAC2, r9
> >>>>> +skip_load_host_debug:
> >>>>> +       /* Clear h/w DBSR and save current(guest) DBSR */
> >>>>> +       mfspr   r9, SPRN_DBSR
> >>>>> +       mtspr   SPRN_DBSR, r9
> >>>>> +       isync
> >>>>> +       andi.   r7, r6, NEED_DEBUG_SAVE
> >>>>> +       beq     skip_dbsr_save
> >>>>> +       /*
> >>>>> +        * If vcpu->guest_debug flag is set then do not check for
> >>>>> +        * shared->msr.DE as this debugging (say by QEMU) does not
> >>>>> +        * depends on shared->msr.de. In these scanerios MSR.DE is
> >>>>> +        * always set using shared_msr and should be handled always.
> >>>>> +        */
> >>>>> +       lwz     r7, VCPU_GUEST_DEBUG(r4)
> >>>>> +       cmpwi   r7, 0
> >>>>> +       bne     skip_save_trap_event
> >>>>> +       PPC_LL  r3, VCPU_SHARED(r4)
> >>>>> +#ifndef CONFIG_64BIT
> >>>>> +       lwz     r3, (VCPU_SHARED_MSR + 4)(r3)
> >>>>> +#else
> >>>>> +       ld      r3, (VCPU_SHARED_MSR)(r3)
> >>>>> +#endif
> >>>>
> >>>> Didn't we have 64-bit access helpers for these?
> >>>
> >>> I will use PPC_LD.
> >>>
> >>>>
> >>>> Also this shouldn't happen in the entry/exit code. We have
> >>>> shadow_msr for these purposes.
> >>>
> >>> Based on shared_msr.MSR_DE, we set make DBSR (set DBSR_TIE).
> >>
> >> Hrm. Yeah, must've misread something here.
> >>
> >> It would be awesome if we could make the debug register save/restore
> >> a bit more relaxed though. For example during preemption.
> >
> > If we delay this save/restore then host cannot do debugging of KVM code.
> Right?
> 
> You mean using kdb? Does anyone actually do that?
> 
> But let's leave this for later as an optimization.

Do you mean that debug registers should be saved/restored in preemption? any 
specific reason? I think the current save/restore does not add much overhead 
and we can better it now only, is not it? 

Thanks
-Bharat
> 
> 
> Alex
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to