Re: [PATCH 2/2] KVM: PPC: booke: Add watchdog emulation

2012-07-06 Thread Alexander Graf

On 28.06.2012, at 08:17, Bharat Bhushan wrote:

 This patch adds the watchdog emulation in KVM. The watchdog
 emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_WDT) ioctl.
 The kernel timer are used for watchdog emulation and emulates
 h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
 if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
 it is configured.
 
 Signed-off-by: Liu Yu yu@freescale.com
 Signed-off-by: Scott Wood scottw...@freescale.com
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
 arch/powerpc/include/asm/kvm_host.h  |2 +
 arch/powerpc/include/asm/kvm_ppc.h   |3 +
 arch/powerpc/include/asm/reg_booke.h |2 +
 arch/powerpc/kvm/44x.c   |1 +
 arch/powerpc/kvm/booke.c |  125 ++
 arch/powerpc/kvm/booke_emulate.c |6 ++-
 arch/powerpc/kvm/powerpc.c   |   25 ++-
 include/linux/kvm.h  |2 +
 8 files changed, 162 insertions(+), 4 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/kvm_host.h 
 b/arch/powerpc/include/asm/kvm_host.h
 index 50ea12f..a9e5aed 100644
 --- a/arch/powerpc/include/asm/kvm_host.h
 +++ b/arch/powerpc/include/asm/kvm_host.h
 @@ -467,6 +467,7 @@ struct kvm_vcpu_arch {
   ulong fault_esr;
   ulong queued_dear;
   ulong queued_esr;
 + struct timer_list wdt_timer;
   u32 tlbcfg[4];
   u32 mmucfg;
   u32 epr;
 @@ -482,6 +483,7 @@ struct kvm_vcpu_arch {
   u8 osi_needed;
   u8 osi_enabled;
   u8 papr_enabled;
 + u8 watchdog_enable;
   u8 sane;
   u8 cpu_type;
   u8 hcall_needed;
 diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
 b/arch/powerpc/include/asm/kvm_ppc.h
 index 0124937..e5cf4b9 100644
 --- a/arch/powerpc/include/asm/kvm_ppc.h
 +++ b/arch/powerpc/include/asm/kvm_ppc.h
 @@ -67,6 +67,7 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run, struct 
 kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(unsigned long data);
 +extern void kvmppc_watchdog_func(unsigned long data);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
 
 /* Core-specific hooks */
 @@ -104,6 +105,8 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu 
 *vcpu,
struct kvm_interrupt *irq);
 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
  struct kvm_interrupt *irq);
 +extern void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu);
 +extern void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
   unsigned int op, int *advance);
 diff --git a/arch/powerpc/include/asm/reg_booke.h 
 b/arch/powerpc/include/asm/reg_booke.h
 index 2d916c4..7efb3f5 100644
 --- a/arch/powerpc/include/asm/reg_booke.h
 +++ b/arch/powerpc/include/asm/reg_booke.h
 @@ -538,6 +538,8 @@
 #define FP_2_21   3   /* 2^21 clocks */
 #define TCR_FIE   0x0080  /* FIT Interrupt Enable */
 #define TCR_ARE   0x0040  /* Auto Reload Enable */
 +#define TCR_GET_FSL_WP(tcr)  tcr)  0xC000)  30) | \
 +   (((tcr)  0x1E)  15))
 
 /* Bit definitions for the TSR. */
 #define TSR_ENW   0x8000  /* Enable Next Watchdog */
 diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
 index 50e7dbc..a213aba 100644
 --- a/arch/powerpc/kvm/44x.c
 +++ b/arch/powerpc/kvm/44x.c
 @@ -28,6 +28,7 @@
 #include asm/kvm_44x.h
 #include asm/kvm_ppc.h
 
 +#include booke.h
 #include 44x_tlb.h
 #include booke.h
 
 diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
 index d25a097..ad30f75 100644
 --- a/arch/powerpc/kvm/booke.c
 +++ b/arch/powerpc/kvm/booke.c
 @@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
   clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, vcpu-arch.pending_exceptions);
 }
 
 +void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
 +{
 + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
 +}
 +
 +void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
 +{
 + clear_bit(BOOKE_IRQPRIO_WATCHDOG, vcpu-arch.pending_exceptions);
 +}
 +
 static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
 @@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu 
 *vcpu,
   msr_mask = MSR_CE | MSR_ME | MSR_DE;
   int_class = INT_CLASS_NONCRIT;
   break;
 + case BOOKE_IRQPRIO_WATCHDOG:
   case BOOKE_IRQPRIO_CRITICAL:
   case BOOKE_IRQPRIO_DBELL_CRIT:
   allowed = vcpu-arch.shared-msr  MSR_CE;
 @@ -404,12 +415,98 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu 
 *vcpu,
   return allowed;
 }
 
 +/*
 + * The 

Re: [PATCH 1/2] KVM: PPC: Critical interrupt emulation support

2012-07-06 Thread Alexander Graf

On 28.06.2012, at 07:37, Bharat Bhushan wrote:

 rfci instruction and CSRR0/1 registers are emulated.
 
 Signed-off-by: Scott Wood scottw...@freescale.com
 Signed-off-by: Stuart Yoder stuart.yo...@freescale.com
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com

Thanks, applied to kvm-ppc-next.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 08/17] KVM: PPC: e500mc: Fix tlbilx emulation for 64-bit guests

2012-07-06 Thread Alexander Graf

On 25.06.2012, at 14:26, Mihai Caraman wrote:

 tlbilxva emulation was using an u32 variable for guest effective address.
 Replace it with gva_t type to handle 64-bit guests.
 
 Signed-off-by: Mihai Caraman mihai.cara...@freescale.com

Thanks, applied to kvm-ppc-next.


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] msi/msix: added API to set MSI message address and data

2012-07-06 Thread Alexander Graf

On 02.07.2012, at 09:24, Jan Kiszka wrote:

 On 2012-07-02 06:28, Alexey Kardashevskiy wrote:
 Ping?
 
 
 On 22/06/12 11:15, Alexey Kardashevskiy wrote:
 Added (msi|msix)_set_message() function for whoever might
 want to use them.
 
 Currently msi_notify()/msix_notify() write to these vectors to
 signal the guest about an interrupt so the correct values have to
 written there by the guest or QEMU.
 
 For example, POWER guest never initializes MSI/MSIX vectors, instead
 it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
 POWER we have to initialize MSI/MSIX message from QEMU.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
 hw/msi.c  |   13 +
 hw/msi.h  |1 +
 hw/msix.c |9 +
 hw/msix.h |2 ++
 4 files changed, 25 insertions(+)
 
 diff --git a/hw/msi.c b/hw/msi.c
 index 5233204..cc6102f 100644
 --- a/hw/msi.c
 +++ b/hw/msi.c
 @@ -105,6 +105,19 @@ static inline uint8_t msi_pending_off(const PCIDevice* 
 dev, bool msi64bit)
 return dev-msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : 
 PCI_MSI_PENDING_32);
 }
 
 +void msi_set_message(PCIDevice *dev, MSIMessage msg)
 +{
 +uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev));
 +bool msi64bit = flags  PCI_MSI_FLAGS_64BIT;
 +
 +if (msi64bit) {
 +pci_set_quad(dev-config + msi_address_lo_off(dev), msg.address);
 +} else {
 +pci_set_long(dev-config + msi_address_lo_off(dev), msg.address);
 +}
 +pci_set_word(dev-config + msi_data_off(dev, msi64bit), msg.data);
 +}
 +
 bool msi_enabled(const PCIDevice *dev)
 {
 return msi_present(dev) 
 diff --git a/hw/msi.h b/hw/msi.h
 index 75747ab..6ec1f99 100644
 --- a/hw/msi.h
 +++ b/hw/msi.h
 @@ -31,6 +31,7 @@ struct MSIMessage {
 
 extern bool msi_supported;
 
 +void msi_set_message(PCIDevice *dev, MSIMessage msg);
 bool msi_enabled(const PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
  unsigned int nr_vectors, bool msi64bit, bool 
 msi_per_vector_mask);
 diff --git a/hw/msix.c b/hw/msix.c
 index ded3c55..5f7d6d3 100644
 --- a/hw/msix.c
 +++ b/hw/msix.c
 @@ -45,6 +45,15 @@ static MSIMessage msix_get_message(PCIDevice *dev, 
 unsigned vector)
 return msg;
 }
 
 +void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
 +{
 +uint8_t *table_entry = dev-msix_table_page + vector * 
 PCI_MSIX_ENTRY_SIZE;
 +
 +pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
 +pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
 +table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] = 
 ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
 +}
 +
 /* Add MSI-X capability to the config space for the device. */
 /* Given a bar and its size, add MSI-X table on top of it
  * and fill MSI-X capability in the config space.
 diff --git a/hw/msix.h b/hw/msix.h
 index 50aee82..26a437e 100644
 --- a/hw/msix.h
 +++ b/hw/msix.h
 @@ -4,6 +4,8 @@
 #include qemu-common.h
 #include pci.h
 
 +void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
 +
 int msix_init(PCIDevice *pdev, unsigned short nentries,
   MemoryRegion *bar,
   unsigned bar_nr, unsigned bar_size);
 
 
 
 Acked-by: Jan Kiszka jan.kis...@siemens.com

So I take it that you expect this patch to go through my tree, not yours?


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] msi/msix: added API to set MSI message address and data

2012-07-06 Thread Jan Kiszka
On 2012-07-06 17:36, Alexander Graf wrote:
 
 On 02.07.2012, at 09:24, Jan Kiszka wrote:
 
 On 2012-07-02 06:28, Alexey Kardashevskiy wrote:
 Ping?


 On 22/06/12 11:15, Alexey Kardashevskiy wrote:
 Added (msi|msix)_set_message() function for whoever might
 want to use them.

 Currently msi_notify()/msix_notify() write to these vectors to
 signal the guest about an interrupt so the correct values have to
 written there by the guest or QEMU.

 For example, POWER guest never initializes MSI/MSIX vectors, instead
 it uses RTAS hypercalls. So in order to support MSIX for virtio-pci on
 POWER we have to initialize MSI/MSIX message from QEMU.

 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
 hw/msi.c  |   13 +
 hw/msi.h  |1 +
 hw/msix.c |9 +
 hw/msix.h |2 ++
 4 files changed, 25 insertions(+)

 diff --git a/hw/msi.c b/hw/msi.c
 index 5233204..cc6102f 100644
 --- a/hw/msi.c
 +++ b/hw/msi.c
 @@ -105,6 +105,19 @@ static inline uint8_t msi_pending_off(const 
 PCIDevice* dev, bool msi64bit)
 return dev-msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : 
 PCI_MSI_PENDING_32);
 }

 +void msi_set_message(PCIDevice *dev, MSIMessage msg)
 +{
 +uint16_t flags = pci_get_word(dev-config + msi_flags_off(dev));
 +bool msi64bit = flags  PCI_MSI_FLAGS_64BIT;
 +
 +if (msi64bit) {
 +pci_set_quad(dev-config + msi_address_lo_off(dev), msg.address);
 +} else {
 +pci_set_long(dev-config + msi_address_lo_off(dev), msg.address);
 +}
 +pci_set_word(dev-config + msi_data_off(dev, msi64bit), msg.data);
 +}
 +
 bool msi_enabled(const PCIDevice *dev)
 {
 return msi_present(dev) 
 diff --git a/hw/msi.h b/hw/msi.h
 index 75747ab..6ec1f99 100644
 --- a/hw/msi.h
 +++ b/hw/msi.h
 @@ -31,6 +31,7 @@ struct MSIMessage {

 extern bool msi_supported;

 +void msi_set_message(PCIDevice *dev, MSIMessage msg);
 bool msi_enabled(const PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
  unsigned int nr_vectors, bool msi64bit, bool 
 msi_per_vector_mask);
 diff --git a/hw/msix.c b/hw/msix.c
 index ded3c55..5f7d6d3 100644
 --- a/hw/msix.c
 +++ b/hw/msix.c
 @@ -45,6 +45,15 @@ static MSIMessage msix_get_message(PCIDevice *dev, 
 unsigned vector)
 return msg;
 }

 +void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg)
 +{
 +uint8_t *table_entry = dev-msix_table_page + vector * 
 PCI_MSIX_ENTRY_SIZE;
 +
 +pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address);
 +pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data);
 +table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] = 
 ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
 +}
 +
 /* Add MSI-X capability to the config space for the device. */
 /* Given a bar and its size, add MSI-X table on top of it
  * and fill MSI-X capability in the config space.
 diff --git a/hw/msix.h b/hw/msix.h
 index 50aee82..26a437e 100644
 --- a/hw/msix.h
 +++ b/hw/msix.h
 @@ -4,6 +4,8 @@
 #include qemu-common.h
 #include pci.h

 +void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
 +
 int msix_init(PCIDevice *pdev, unsigned short nentries,
   MemoryRegion *bar,
   unsigned bar_nr, unsigned bar_size);



 Acked-by: Jan Kiszka jan.kis...@siemens.com
 
 So I take it that you expect this patch to go through my tree, not yours?

I suppose you wanted to address Michael, right? He has to decide.

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
Corporate Competence Center Embedded Linux


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH] KVM: PPC: bookehv: Add ESR flag to Data Storage Interrupt

2012-07-06 Thread Caraman Mihai Claudiu-B02008
 From: Kun Wang [mailto:wang...@cn.ibm.com] 
 Sent: Friday, July 06, 2012 5:07 PM
 To: Caraman Mihai Claudiu-B02008
 Cc: ag...@suse.de; k...@vger.kernel.org; kvm-ppc@vger.kernel.org; 
 kvm-ppc-ow...@vger.kernel.org
 Subject: Re: [PATCH] KVM: PPC: bookehv: Add ESR flag to Data Storage Interrupt

 Mike,

 I am going to try your patch on our A2 machine. One quick (and naive) 
 question,
 which git tree am I supposed to apply it to? 
 git://git.kernel.org/pub/scm/virt/kvm/kvm.git?

 Best regards,
 Kun Wang

I think you are looking for this patchset, you will find all details there:
   [RFC PATCH 00/17] KVM: PPC: 64-bit Book3E support

Regards,
Mike

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM kernel hooks

2012-07-06 Thread Caraman Mihai Claudiu-B02008
 -Original Message-
 From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org]
 Sent: Thursday, July 05, 2012 1:26 AM
 To: Alexander Graf
 Cc: Caraman Mihai Claudiu-B02008; kvm-ppc@vger.kernel.org; KVM list;
 linuxppc-dev; qemu-...@nongnu.org List
 Subject: Re: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM
 kernel hooks
 
 On Wed, 2012-07-04 at 16:29 +0200, Alexander Graf wrote:
 
   +#ifdef CONFIG_KVM_BOOKE_HV
   +#define KVM_BOOKE_HV_MFSPR(reg, spr) \
   + BEGIN_FTR_SECTION   \
   + mfspr   reg, spr;   \
   + END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
   +#else
   +#define KVM_BOOKE_HV_MFSPR(reg, spr)
   +#endif
 
  Bleks - this is ugly. Do we really need to open-code the #ifdef here?
  Can't the feature section code determine that the feature is disabled
  and just always not include the code?
 
 You can't but in any case I don't see the point of the conditional here,
 we'll eventually have to load srr1 no ? We can move the load up to here
 in all cases or can't we ? 

I like the idea, but there is a problem with addition macros which may clobber
r11 and PROLOG_ADDITION_MASKABLE_GEN is such a case.

 If really not, we could have it inside DO_KVM and be done with it no ?

32-bit exception prolog loads srr1 unconditionally, as Alex and Scott mentioned
earlier, so we will be suboptimal for this case.

-Mike
N�r��yb�X��ǧv�^�)޺{.n�+jir)w*jg����ݢj/���z�ޖ��2�ޙ�)ߡ�a�����G���h��j:+v���w��٥

Re: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM kernel hooks

2012-07-06 Thread Alexander Graf

On 07.07.2012, at 00:33, Caraman Mihai Claudiu-B02008 wrote:

 -Original Message-
 From: Benjamin Herrenschmidt [mailto:b...@kernel.crashing.org]
 Sent: Thursday, July 05, 2012 1:26 AM
 To: Alexander Graf
 Cc: Caraman Mihai Claudiu-B02008; kvm-ppc@vger.kernel.org; KVM list;
 linuxppc-dev; qemu-...@nongnu.org List
 Subject: Re: [Qemu-ppc] [RFC PATCH 12/17] PowerPC: booke64: Add DO_KVM
 kernel hooks
 
 On Wed, 2012-07-04 at 16:29 +0200, Alexander Graf wrote:
 
 +#ifdef CONFIG_KVM_BOOKE_HV
 +#define KVM_BOOKE_HV_MFSPR(reg, spr)  \
 +  BEGIN_FTR_SECTION   \
 +  mfspr   reg, spr;   \
 +  END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 +#else
 +#define KVM_BOOKE_HV_MFSPR(reg, spr)
 +#endif
 
 Bleks - this is ugly. Do we really need to open-code the #ifdef here?
 Can't the feature section code determine that the feature is disabled
 and just always not include the code?
 
 You can't but in any case I don't see the point of the conditional here,
 we'll eventually have to load srr1 no ? We can move the load up to here
 in all cases or can't we ? 
 
 I like the idea, but there is a problem with addition macros which may clobber
 r11 and PROLOG_ADDITION_MASKABLE_GEN is such a case.

Mike -v please :)


Alex

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] KVM: PPC: booke: Add watchdog emulation

2012-07-06 Thread Scott Wood
On 07/06/2012 08:17 AM, Alexander Graf wrote:
 On 28.06.2012, at 08:17, Bharat Bhushan wrote:
 +/*
 + * The timer system can almost deal with LONG_MAX timeouts, except that
 + * when you get very close to LONG_MAX, the slack added can cause overflow.
 + *
 + * LONG_MAX/2 is a conservative threshold, but it should be adequate for
 + * any realistic use.
 + */
 +#define MAX_TIMEOUT (LONG_MAX/2)
 
 Should this really be in kvm code?

It looks like we can use NEXT_TIMER_MAX_DELTA for this.

 +mask = 1ULL  (63 - period);
 +tb = get_tb();
 +if (tb  mask)
 +nr_jiffies += mask;
 
 To be honest, you lost me here. nr_jiffies is jiffies, right? While
 mask is basically in timebase granularity. I suppose you're just
 reusing the variable here for no good reason - the compiler will
 gladly optimize things for you if you write things a bit more verbose
 :).

Probably due to the way do_div() works, but yeah, it's confusing.  Maybe
something generic like ticks, interval, remaining, etc. would be
better, with a comment on the do_div saying it's converting timebase
ticks into jiffies.

 +static void arm_next_watchdog(struct kvm_vcpu *vcpu)
 +{
 +unsigned long nr_jiffies;
 +
 +nr_jiffies = watchdog_next_timeout(vcpu);
 +if (nr_jiffies  MAX_TIMEOUT)
 +mod_timer(vcpu-arch.wdt_timer, jiffies + nr_jiffies);
 +else
 +del_timer(vcpu-arch.wdt_timer);
 
 Can you del a timer that's not armed? Could that ever happen in this case?

del_timer() deactivates a timer - this works on both active and
inactive timers.

 Also, could the timer possibly be running somewhere, so do we need 
 del_timer_sync? Or don't we need to care?

This can be called in the context of the timer, so del_timer_sync()
would hang.

As for what would happen if a caller from a different context were to
race with a timer, I think you could end up with the timer armed based
on an old TCR.  del_timer_sync() won't help though, unless you replace
mod_timer() with del_timer_sync() plus add_timer() (with a check to see
whether it's running in timer context).  A better solution is probably
to use a spinlock in arm_next_watchdog().

 +void kvmppc_watchdog_func(unsigned long data)
 +{
 +struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
 +u32 tsr, new_tsr;
 +int final;
 +
 +do {
 +new_tsr = tsr = vcpu-arch.tsr;
 +final = 0;
 +
 +/* Time out event */
 +if (tsr  TSR_ENW) {
 +if (tsr  TSR_WIS) {
 +new_tsr = (tsr  ~TCR_WRC_MASK) |
 +  (vcpu-arch.tcr  TCR_WRC_MASK);
 +vcpu-arch.tcr = ~TCR_WRC_MASK;
 +final = 1;
 +} else {
 +new_tsr = tsr | TSR_WIS;
 +}
 +} else {
 +new_tsr = tsr | TSR_ENW;
 +}
 +} while (cmpxchg(vcpu-arch.tsr, tsr, new_tsr) != tsr);
 +
 +if (new_tsr  (TSR_WIS | TCR_WRC_MASK)) {
 +smp_wmb();
 +kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
 +kvm_vcpu_kick(vcpu);
 +}
 +
 +/*
 + * Avoid getting a storm of timers if the guest sets
 + * the period very short.  We'll restart it if anything
 + * changes.
 + */
 +if (!final)
 +arm_next_watchdog(vcpu);
 
 Mind to explain this part a bit further?

The whole function, or some subset near the end?

The if (!final) check means that we stop running the timer after final
expiration, to prevent the host from being flooded with timers if the
guest sets a short period but does not have TCR set to exit to QEMU.
Timers will resume the next time TSR/TCR is updated.

 @@ -1106,7 +1213,14 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
  }

  if (sregs-u.e.update_special  KVM_SREGS_E_UPDATE_TSR) {
 +u32 old_tsr = vcpu-arch.tsr;
 +
  vcpu-arch.tsr = sregs-u.e.tsr;
 +
 +if ((old_tsr ^ vcpu-arch.tsr) 
 +(TSR_ENW | TSR_WIS | TCR_WRC_MASK))
 +arm_next_watchdog(vcpu);
 
 Why isn't this one guarded by vcpu-arch.watchdog_enable?

I'm not sure that any of them should be -- there's no reason for the
watchdog interrupt mechanism to be dependent on QEMU, only the
heavyweight exit on final expiration.

 +spr_val = ~TCR_WRC_MASK;
 +kvmppc_set_tcr(vcpu,
 +   spr_val | (TCR_WRC_MASK  vcpu-arch.tcr));
 
 In fact, what you're trying to do here is keep TCR_WRC always on when it was 
 enabled once. So all you need is the OR here. No need for the mask above.

WRC is a 2-bit field that is supposed to preserve its value once written
to be non-zero.  Not that we actually do anything different based on the
specific non-zero value, but still we should implement the architected
semantics.

-Scott

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the