[RFC v5 49/57] arm64: kvm: Annotate non-standard stack frame functions

2020-01-09 Thread Julien Thierry
From: Raphael Gault 

Both __guest_entry and __guest_exit functions do not setup
a correct stack frame. Because they can be considered as callable
functions, even if they are particular cases, we chose to silence
the warnings given by objtool by annotating them as non-standard.

Signed-off-by: Raphael Gault 
Signed-off-by: Julien Thierry 
Cc: Marc Zyngier 
Cc: James Morse 
Cc: Suzuki K Poulose 
Cc: kvmarm@lists.cs.columbia.edu
---
 arch/arm64/kvm/hyp/entry.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e5cc8d66bf53..c3443bfd0944 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 

 #define CPU_GP_REG_OFFSET(x)   (CPU_GP_REGS + x)
 #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
@@ -97,6 +98,7 @@ alternative_else_nop_endif
eret
sb
 ENDPROC(__guest_enter)
+asm_stack_frame_non_standard __guest_enter

 ENTRY(__guest_exit)
// x0: return code
@@ -193,3 +195,4 @@ abort_guest_exit_end:
orr x0, x0, x5
 1: ret
 ENDPROC(__guest_exit)
+asm_stack_frame_non_standard __guest_exit
--
2.21.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC v5 52/57] arm64: kernel: Annotate non-standard stack frame functions

2020-01-09 Thread Julien Thierry
From: Raphael Gault 

Annotate assembler functions which are callable but do not
setup a correct stack frame.

Signed-off-by: Raphael Gault 
Signed-off-by: Julien Thierry 
Cc: Marc Zyngier 
Cc: James Morse 
Cc: Suzuki K Poulose 
Cc: kvmarm@lists.cs.columbia.edu
---
 arch/arm64/kernel/hyp-stub.S | 3 +++
 arch/arm64/kvm/hyp-init.S| 3 +++
 2 files changed, 6 insertions(+)

diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 73d46070b315..8917d42f38c7 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -6,6 +6,7 @@
  * Author: Marc Zyngier 
  */

+#include 
 #include 
 #include 
 #include 
@@ -42,6 +43,7 @@ ENTRY(__hyp_stub_vectors)
ventry  el1_fiq_invalid // FIQ 32-bit EL1
ventry  el1_error_invalid   // Error 32-bit EL1
 ENDPROC(__hyp_stub_vectors)
+asm_stack_frame_non_standard __hyp_stub_vectors

.align 11

@@ -69,6 +71,7 @@ el1_sync:
 9: mov x0, xzr
eret
 ENDPROC(el1_sync)
+asm_stack_frame_non_standard el1_sync

 .macro invalid_vector  label
 \label:
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 160be2b4696d..63deea39313d 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 

.text
.pushsection.hyp.idmap.text, "ax"
@@ -118,6 +119,7 @@ CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
/* Hello, World! */
eret
 ENDPROC(__kvm_hyp_init)
+asm_stack_frame_non_standard __kvm_hyp_init

 ENTRY(__kvm_handle_stub_hvc)
cmp x0, #HVC_SOFT_RESTART
@@ -159,6 +161,7 @@ reset:
eret

 ENDPROC(__kvm_handle_stub_hvc)
+asm_stack_frame_non_standard __kvm_handle_stub_hvc

.ltorg

--
2.21.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2] KVM: arm/arm64: Introduce kvm_pmu_vcpu_init() to setup PMU counter idx

2019-07-23 Thread Julien Thierry
Hi Zenghui,

On 18/07/2019 09:15, Zenghui Yu wrote:
> We use "pmc->idx" and the "chained" bitmap to determine if the pmc is
> chained, in kvm_pmu_pmc_is_chained().  But idx might be uninitialized
> (and random) when we doing this decision, through a KVM_ARM_VCPU_INIT
> ioctl -> kvm_pmu_vcpu_reset(). And the test_bit() against this random
> idx will potentially hit a KASAN BUG [1].
> 
> In general, idx is the static property of a PMU counter that is not
> expected to be modified across resets, as suggested by Julien.  It
> looks more reasonable if we can setup the PMU counter idx for a vcpu
> in its creation time. Introduce a new function - kvm_pmu_vcpu_init()
> for this basic setup. Oh, and the KASAN BUG will get fixed this way.
> 
> [1] https://www.spinics.net/lists/kvm-arm/msg36700.html
> 
> Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters")
> Suggested-by: Andrew Murray 
> Suggested-by: Julien Thierry 
> Cc: Marc Zyngier 
> Signed-off-by: Zenghui Yu 
> ---
> 
> Changes since v1:
>  - Introduce kvm_pmu_vcpu_init() in vcpu's creation time, move the
>assignment of pmc->idx into it.
>  - Thus change the subject. The old one is "KVM: arm/arm64: Assign
>pmc->idx before kvm_pmu_stop_counter()".
> 
> Julien, I haven't collected your Acked-by into this version. If you're
> still happy with the change, please Ack again. Thanks!
> 

Thanks for making the change. This looks good to me:

Acked-by: Julien Thierry 

Thanks,

Julien

>  include/kvm/arm_pmu.h |  2 ++
>  virt/kvm/arm/arm.c|  2 ++
>  virt/kvm/arm/pmu.c| 18 +++---
>  3 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index 16c769a..6db0304 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -34,6 +34,7 @@ struct kvm_pmu {
>  u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
>  void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 
> val);
>  u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu);
> +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu);
>  void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
>  void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
>  void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val);
> @@ -71,6 +72,7 @@ static inline u64 kvm_pmu_valid_counter_mask(struct 
> kvm_vcpu *vcpu)
>  {
>   return 0;
>  }
> +static inline void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 
> val) {}
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index f645c0f..c704fa6 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -340,6 +340,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>   /* Set up the timer */
>   kvm_timer_vcpu_init(vcpu);
>  
> + kvm_pmu_vcpu_init(vcpu);
> +
>   kvm_arm_reset_debug_ptr(vcpu);
>  
>   return kvm_vgic_vcpu_init(vcpu);
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index 3dd8238..362a018 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -215,6 +215,20 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, 
> struct kvm_pmc *pmc)
>  }
>  
>  /**
> + * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
> + * @vcpu: The vcpu pointer
> + *
> + */
> +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> + int i;
> + struct kvm_pmu *pmu = >arch.pmu;
> +
> + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
> + pmu->pmc[i].idx = i;
> +}
> +
> +/**
>   * kvm_pmu_vcpu_reset - reset pmu state for cpu
>   * @vcpu: The vcpu pointer
>   *
> @@ -224,10 +238,8 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
>   int i;
>   struct kvm_pmu *pmu = >arch.pmu;
>  
> - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
> + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
>   kvm_pmu_stop_counter(vcpu, >pmc[i]);
> - pmu->pmc[i].idx = i;
> - }
>  
>   bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
>  }
> 

-- 
Julien Thierry


Re: [PATCH] KVM: arm/arm64: Assign pmc->idx before kvm_pmu_stop_counter()

2019-07-17 Thread Julien Thierry
Hi Zenghui,

On 17/07/2019 13:20, Zenghui Yu wrote:
> We use "pmc->idx" and the "chained" bitmap to determine if the pmc is
> chained, in kvm_pmu_pmc_is_chained().  But idx might be uninitialized
> (and random) when we doing this decision, through a KVM_ARM_VCPU_INIT
> ioctl -> kvm_pmu_vcpu_reset(). And the test_bit() against this random
> idx will potentially hit a KASAN BUG [1].
> 
> Fix it by moving the assignment of idx before kvm_pmu_stop_counter().
> 
> [1] https://www.spinics.net/lists/kvm-arm/msg36700.html
> 
> Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters")
> Suggested-by: Andrew Murray 
> Cc: Marc Zyngier 
> Signed-off-by: Zenghui Yu > ---
>  virt/kvm/arm/pmu.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index 3dd8238..521bfdd 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -225,8 +225,8 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
>   struct kvm_pmu *pmu = >arch.pmu;
>  
>   for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
> - kvm_pmu_stop_counter(vcpu, >pmc[i]);
>   pmu->pmc[i].idx = i;

Yes, this is kind of a static property that should really be part of a
"kvm_pmu_vcpu_init()" or "kvm_pmu_vcpu_create()" and is not expected to
be modified across resets...

There is no such function at the time and I'm unsure whether this
warrants creating that separate function (I would still suggest creating
it to make things clearer).

> + kvm_pmu_stop_counter(vcpu, >pmc[i]);

Whatever other opinions are on splitting pmu_vcpu_init/reset, that
change makes sense and fixes the issue:

Acked-by: Julien Thierry 

>   }
>  
>   bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
> 

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH] MAINTAINERS: Update my email address

2019-07-17 Thread Julien Thierry
My @arm.com address will stop working in a couple of weeks. Update
MAINTAINERS and .mailmap files with an address I'll have access to.

Signed-off-by: Julien Thierry 
---
 .mailmap| 1 +
 MAINTAINERS | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index 0fef932..468bced8 100644
--- a/.mailmap
+++ b/.mailmap
@@ -116,6 +116,7 @@ John Stultz 
 Juha Yrjola 
 Juha Yrjola 
 Juha Yrjola 
+Julien Thierry  
 Kay Sievers 
 Kenneth W Chen 
 Konstantin Khlebnikov  
diff --git a/MAINTAINERS b/MAINTAINERS
index 91d8700..9525601 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8808,7 +8808,7 @@ F:arch/x86/kvm/svm.c
 KERNEL VIRTUAL MACHINE FOR ARM/ARM64 (KVM/arm, KVM/arm64)
 M: Marc Zyngier 
 R: James Morse 
-R: Julien Thierry 
+R: Julien Thierry 
 R: Suzuki K Pouloze 
 L: linux-arm-ker...@lists.infradead.org (moderated for non-subscribers)
 L: kvmarm@lists.cs.columbia.edu
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 7/9] arm/arm64: kvm: pmu: Make overflow handler NMI safe

2019-07-17 Thread Julien Thierry
When using an NMI for the PMU interrupt, taking any lock might cause a
deadlock. The current PMU overflow handler in KVM takes locks when
trying to wake up a vcpu.

When overflow handler is called by an NMI, defer the vcpu waking in an
irq_work queue.

Signed-off-by: Julien Thierry 
Cc: Christoffer Dall 
Cc: Marc Zyngier 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: James Morse 
Cc: Suzuki K Pouloze 
Cc: kvmarm@lists.cs.columbia.edu
---
 include/kvm/arm_pmu.h |  1 +
 virt/kvm/arm/pmu.c| 25 -
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 16c769a..8202ed7 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -27,6 +27,7 @@ struct kvm_pmu {
bool ready;
bool created;
bool irq_level;
+   struct irq_work overflow_work;
 };

 #define kvm_arm_pmu_v3_ready(v)((v)->arch.pmu.ready)
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 3dd8238..deed8fb 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -421,6 +421,22 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
 }

 /**
+ * When perf interrupt is an NMI, we cannot safely notify the vcpu 
corresponding
+ * to the event.
+ * This is why we need a callback to do it once outside of the NMI context.
+ */
+static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
+{
+   struct kvm_vcpu *vcpu;
+   struct kvm_pmu *pmu;
+
+   pmu = container_of(work, struct kvm_pmu, overflow_work);
+   vcpu = kvm_pmc_to_vcpu(>pmc[0]);
+
+   kvm_vcpu_kick(vcpu);
+}
+
+/**
  * When the perf event overflows, set the overflow status and inform the vcpu.
  */
 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
@@ -435,7 +451,11 @@ static void kvm_pmu_perf_overflow(struct perf_event 
*perf_event,

if (kvm_pmu_overflow_status(vcpu)) {
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
-   kvm_vcpu_kick(vcpu);
+
+   if (!in_nmi())
+   kvm_vcpu_kick(vcpu);
+   else
+   irq_work_queue(>arch.pmu.overflow_work);
}
 }

@@ -706,6 +726,9 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
return ret;
}

+   init_irq_work(>arch.pmu.overflow_work,
+ kvm_pmu_perf_overflow_notify_vcpu);
+
vcpu->arch.pmu.created = true;
return 0;
 }
--
1.9.1
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 7/9] arm/arm64: kvm: pmu: Make overflow handler NMI safe

2019-07-08 Thread Julien Thierry
When using an NMI for the PMU interrupt, taking any lock migh cause a
deadlock. The current PMU overflow handler in KVM takes takes locks when
trying to wake up a vcpu.

When overflow handler is called by an NMI, defer the vcpu waking in an
irq_work queue.

Signed-off-by: Julien Thierry 
Cc: Christoffer Dall 
Cc: Marc Zyngier 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: James Morse 
Cc: Suzuki K Pouloze 
Cc: kvmarm@lists.cs.columbia.edu
---
 include/kvm/arm_pmu.h |  1 +
 virt/kvm/arm/pmu.c| 25 -
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 16c769a..8202ed7 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -27,6 +27,7 @@ struct kvm_pmu {
bool ready;
bool created;
bool irq_level;
+   struct irq_work overflow_work;
 };

 #define kvm_arm_pmu_v3_ready(v)((v)->arch.pmu.ready)
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 3dd8238..63f358e 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -421,6 +421,22 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
 }

 /**
+ * When perf interrupt is an NMI, we cannot safely notify the vcpu 
corresponding
+ * to the even.
+ * This is why we need a callback to do it once outside of the NMI context.
+ */
+static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
+{
+   struct kvm_vcpu *vcpu;
+   struct kvm_pmu *pmu;
+
+   pmu = container_of(work, struct kvm_pmu, overflow_work);
+   vcpu = kvm_pmc_to_vcpu(>pmc[0]);
+
+   kvm_vcpu_kick(vcpu);
+}
+
+/**
  * When the perf event overflows, set the overflow status and inform the vcpu.
  */
 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
@@ -435,7 +451,11 @@ static void kvm_pmu_perf_overflow(struct perf_event 
*perf_event,

if (kvm_pmu_overflow_status(vcpu)) {
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
-   kvm_vcpu_kick(vcpu);
+
+   if (!in_nmi())
+   kvm_vcpu_kick(vcpu);
+   else
+   irq_work_queue(>arch.pmu.overflow_work);
}
 }

@@ -706,6 +726,9 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
return ret;
}

+   init_irq_work(>arch.pmu.overflow_work,
+ kvm_pmu_perf_overflow_notify_vcpu);
+
vcpu->arch.pmu.created = true;
return 0;
 }
--
1.9.1
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 52/59] KVM: arm64: nv: vgic: Allow userland to set VGIC maintenance IRQ

2019-07-04 Thread Julien Thierry



On 04/07/2019 10:01, Andre Przywara wrote:
> On Thu, 4 Jul 2019 08:38:20 +0100
> Julien Thierry  wrote:
> 
>> On 21/06/2019 10:38, Marc Zyngier wrote:
>>> From: Andre Przywara 
>>>
>>> The VGIC maintenance IRQ signals various conditions about the LRs, when
>>> the GIC's virtualization extension is used.
>>> So far we didn't need it, but nested virtualization needs to know about
>>> this interrupt, so add a userland interface to setup the IRQ number.
>>> The architecture mandates that it must be a PPI, on top of that this code
>>> only exports a per-device option, so the PPI is the same on all VCPUs.
>>>
>>> Signed-off-by: Andre Przywara 
>>> [added some bits of documentation]
>>> Signed-off-by: Marc Zyngier 
>>> ---
>>>  .../virtual/kvm/devices/arm-vgic-v3.txt   |  9 
>>>  arch/arm/include/uapi/asm/kvm.h   |  1 +
>>>  arch/arm64/include/uapi/asm/kvm.h |  1 +
>>>  include/kvm/arm_vgic.h|  3 +++
>>>  virt/kvm/arm/vgic/vgic-kvm-device.c   | 22 +++
>>>  5 files changed, 36 insertions(+)
>>>
>>> diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt 
>>> b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
>>> index ff290b43c8e5..c70e8f2e0c9c 100644
>>> --- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
>>> +++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
>>> @@ -249,3 +249,12 @@ Groups:
>>>Errors:
>>>  -EINVAL: vINTID is not multiple of 32 or
>>>   info field is not VGIC_LEVEL_INFO_LINE_LEVEL
>>> +
>>> +  KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ
>>> +The attr field of kvm_device_attr encodes the following values:
>>> +bits: | 31   5 | 4    0 |
>>> +values:   |  RES0  |   vINTID   |
>>> +
>>> +The vINTID specifies which interrupt is generated when the vGIC
>>> +must generate a maintenance interrupt. This must be a PPI.
>>> +  
>>
>> Something seems off. The documentation suggests that the value of the
>> attribute will be between 0-15 (and other values will be masked down to
>> a value between 0 and 15).
> 
> Where does that happen? The mask is [4:0], so 5 bits, that should be enough 
> for PPIs as well.
> We could add a line to the documentation to stress that this is an interrupt 
> ID as seen by the virtual GIC, if that helps.
> 

You're right, I misread the length of the vINTID field.

Nevermind then!

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 53/59] KVM: arm64: nv: Implement maintenance interrupt forwarding

2019-07-04 Thread Julien Thierry
c_get_irq(vcpu->kvm, vcpu, vcpu->kvm->arch.vgic.maint_irq);
> + raw_spin_lock_irqsave(>irq_lock, flags);
> + if (irq->line_level || irq->active)
> + irq_set_irqchip_state(kvm_vgic_global_state.maint_irq,
> +   IRQCHIP_STATE_ACTIVE, true);
> + raw_spin_unlock_irqrestore(>irq_lock, flags);
> + vgic_put_irq(vcpu->kvm, irq);
>  }
>  
>  void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
> @@ -190,11 +200,14 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
>*/
>   vgic_v3_fixup_shadow_lr_state(vcpu);
>   vgic_cpu->nested_vgic_v3 = vgic_cpu->shadow_vgic_v3;
> + irq_set_irqchip_state(kvm_vgic_global_state.maint_irq,
> +   IRQCHIP_STATE_ACTIVE, false);
>  }
>  
>  void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu)
>  {
>   struct vgic_v3_cpu_if *cpu_if = vcpu_nested_if(vcpu);
> + bool state;
>  
>   /*
>* If we exit a nested VM with a pending maintenance interrupt from the
> @@ -202,8 +215,12 @@ void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu 
> *vcpu)
>* can re-sync the appropriate LRs and sample level triggered interrupts
>* again.
>*/
> - if (vgic_state_is_nested(vcpu) &&
> - (cpu_if->vgic_hcr & ICH_HCR_EN) &&
> - vgic_v3_get_misr(vcpu))
> - kvm_inject_nested_irq(vcpu);
> + if (!vgic_state_is_nested(vcpu))
> + return;
> +
> + state  = cpu_if->vgic_hcr & ICH_HCR_EN;
> + state &= vgic_v3_get_misr(vcpu);
> +
> + kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
> + vcpu->kvm->arch.vgic.maint_irq, state, vcpu);
>  }
> 

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 52/59] KVM: arm64: nv: vgic: Allow userland to set VGIC maintenance IRQ

2019-07-04 Thread Julien Thierry
EV_ARM_VGIC_GRP_MAINT_IRQ: {
> + u32 __user *uaddr = (u32 __user *)(long)attr->addr;
> + u32 val;
> +
> + if (get_user(val, uaddr))
> + return -EFAULT;
> +
> + /* Must be a PPI. */
> + if ((val >= VGIC_NR_PRIVATE_IRQS) || (val < VGIC_NR_SGIS))
> + return -EINVAL;
> +
> + dev->kvm->arch.vgic.maint_irq = val;
> +
> + return 0;
> + }
>   case KVM_DEV_ARM_VGIC_GRP_CTRL: {
>   int ret;
>  
> @@ -712,6 +733,7 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
>   case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
>   return vgic_v3_has_attr_regs(dev, attr);
>   case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
> + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
>   return 0;
>   case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
>   if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >>
> 

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 13/59] KVM: arm64: nv: Handle virtual EL2 registers in vcpu_read/write_sys_reg()

2019-07-03 Thread Julien Thierry



On 03/07/2019 13:15, Marc Zyngier wrote:
> On 24/06/2019 13:42, Julien Thierry wrote:
>>
>>
>> On 06/21/2019 10:37 AM, Marc Zyngier wrote:
>>> From: Andre Przywara 
>>>
>>> KVM internally uses accessor functions when reading or writing the
>>> guest's system registers. This takes care of accessing either the stored
>>> copy or using the "live" EL1 system registers when the host uses VHE.
>>>
>>> With the introduction of virtual EL2 we add a bunch of EL2 system
>>> registers, which now must also be taken care of:
>>> - If the guest is running in vEL2, and we access an EL1 sysreg, we must
>>>   revert to the stored version of that, and not use the CPU's copy.
>>> - If the guest is running in vEL1, and we access an EL2 sysreg, we must
>>>   also use the stored version, since the CPU carries the EL1 copy.
>>> - Some EL2 system registers are supposed to affect the current execution
>>>   of the system, so we need to put them into their respective EL1
>>>   counterparts. For this we need to define a mapping between the two.
>>>   This is done using the newly introduced struct el2_sysreg_map.
>>> - Some EL2 system registers have a different format than their EL1
>>>   counterpart, so we need to translate them before writing them to the
>>>   CPU. This is done using an (optional) translate function in the map.
>>> - There are the three special registers SP_EL2, SPSR_EL2 and ELR_EL2,
>>>   which need some separate handling.
>>>
>>> All of these cases are now wrapped into the existing accessor functions,
>>> so KVM users wouldn't need to care whether they access EL2 or EL1
>>> registers and also which state the guest is in.
>>>
>>> This handles what was formerly known as the "shadow state" dynamically,
>>> without requiring a separate copy for each vCPU EL.
>>>
>>> Signed-off-by: Andre Przywara 
>>> Signed-off-by: Marc Zyngier 
>>> ---
>>>  arch/arm64/include/asm/kvm_emulate.h |   6 +
>>>  arch/arm64/include/asm/kvm_host.h|   5 +
>>>  arch/arm64/kvm/sys_regs.c| 163 +++
>>>  3 files changed, 174 insertions(+)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_emulate.h 
>>> b/arch/arm64/include/asm/kvm_emulate.h
>>> index c43aac5fed69..f37006b6eec4 100644
>>> --- a/arch/arm64/include/asm/kvm_emulate.h
>>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>>> @@ -70,6 +70,12 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
>>>  int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
>>>  int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
>>>  
>>> +u64 translate_tcr(u64 tcr);
>>> +u64 translate_cptr(u64 tcr);
>>> +u64 translate_sctlr(u64 tcr);
>>> +u64 translate_ttbr0(u64 tcr);
>>> +u64 translate_cnthctl(u64 tcr);
>>> +
>>>  static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
>>>  {
>>> return !(vcpu->arch.hcr_el2 & HCR_RW);
>>> diff --git a/arch/arm64/include/asm/kvm_host.h 
>>> b/arch/arm64/include/asm/kvm_host.h
>>> index 2d4290d2513a..dae9c42a7219 100644
>>> --- a/arch/arm64/include/asm/kvm_host.h
>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>> @@ -217,6 +217,11 @@ enum vcpu_sysreg {
>>> NR_SYS_REGS /* Nothing after this line! */
>>>  };
>>>  
>>> +static inline bool sysreg_is_el2(int reg)
>>> +{
>>> +   return reg >= FIRST_EL2_SYSREG && reg < NR_SYS_REGS;
>>> +}
>>> +
>>>  /* 32bit mapping */
>>>  #define c0_MPIDR   (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
>>>  #define c0_CSSELR  (CSSELR_EL1 * 2)/* Cache Size Selection Register */
>>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>>> index 693dd063c9c2..d024114da162 100644
>>> --- a/arch/arm64/kvm/sys_regs.c
>>> +++ b/arch/arm64/kvm/sys_regs.c
>>> @@ -76,11 +76,142 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
>>> return false;
>>>  }
>>>  
>>> +static u64 tcr_el2_ips_to_tcr_el1_ps(u64 tcr_el2)
>>> +{
>>> +   return ((tcr_el2 & TCR_EL2_PS_MASK) >> TCR_EL2_PS_SHIFT)
>>> +   << TCR_IPS_SHIFT;
>>> +}
>>> +
>>> +u64 translate_tcr(u64 tcr)
>>> +{
>>> +   return TCR_EPD1_MASK |  /* disable TTBR1_EL1 */
>>> +  ((tcr & TCR_EL2_TBI) ? TCR_TBI0 :

Re: [PATCH 44/59] KVM: arm64: nv: Trap and emulate TLBI instructions from virtual EL2

2019-07-02 Thread Julien Thierry
> + kvm_call_hyp(__kvm_tlb_el1_instr,
> +  mmu, p->regval, sys_encoding);
> +
> + mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, 0);
> + if (mmu)
> + kvm_call_hyp(__kvm_tlb_el1_instr,
> +  mmu, p->regval, sys_encoding);
> + mutex_unlock(>kvm->lock);
> +
> + return true;
> +}
> +
>  /*
>   * AT instruction emulation
>   *
> @@ -2333,12 +2506,40 @@ static struct sys_reg_desc sys_insn_descs[] = {
>   { SYS_DESC(SYS_DC_CSW), access_dcsw },
>   { SYS_DESC(SYS_DC_CISW), access_dcsw },
>  
> + SYS_INSN_TO_DESC(TLBI_VMALLE1IS, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_VAE1IS, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_ASIDE1IS, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_VAAE1IS, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_VALE1IS, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_VAALE1IS, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_VMALLE1, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_VAE1, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_ASIDE1, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_VAAE1, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_VALE1, handle_tlbi_el1, forward_ttlb_traps),
> + SYS_INSN_TO_DESC(TLBI_VAALE1, handle_tlbi_el1, forward_ttlb_traps),
> +
>   SYS_INSN_TO_DESC(AT_S1E2R, handle_s1e2, forward_nv_traps),
>   SYS_INSN_TO_DESC(AT_S1E2W, handle_s1e2, forward_nv_traps),
>   SYS_INSN_TO_DESC(AT_S12E1R, handle_s12r, forward_nv_traps),
>   SYS_INSN_TO_DESC(AT_S12E1W, handle_s12w, forward_nv_traps),
>   SYS_INSN_TO_DESC(AT_S12E0R, handle_s12r, forward_nv_traps),
>   SYS_INSN_TO_DESC(AT_S12E0W, handle_s12w, forward_nv_traps),
> +
> + SYS_INSN_TO_DESC(TLBI_IPAS2E1IS, handle_ipas2e1is, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_IPAS2LE1IS, handle_ipas2e1is, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_ALLE2IS, handle_alle2is, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_VAE2IS, handle_vae2, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_ALLE1IS, handle_alle1is, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_VALE2IS, handle_vae2, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_VMALLS12E1IS, handle_vmalls12e1is, 
> forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_IPAS2E1, handle_ipas2e1is, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_IPAS2LE1, handle_ipas2e1is, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_ALLE2, handle_alle2is, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_VAE2, handle_vae2, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_ALLE1, handle_alle1is, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_VALE2, handle_vae2, forward_nv_traps),
> + SYS_INSN_TO_DESC(TLBI_VMALLS12E1, handle_vmalls12e1is, 
> forward_nv_traps),
>  };
>  
>  static bool trap_dbgidr(struct kvm_vcpu *vcpu,
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index 6a7cba077bce..0ea79e543b29 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -51,7 +51,23 @@ static bool memslot_is_logging(struct kvm_memory_slot 
> *memslot)
>   */
>  void kvm_flush_remote_tlbs(struct kvm *kvm)
>  {
> - kvm_call_hyp(__kvm_tlb_flush_vmid, >arch.mmu);
> + struct kvm_s2_mmu *mmu = >arch.mmu;
> +
> + if (mmu == >arch.mmu) {
> + /*
> +  * For a normal (i.e. non-nested) guest, flush entries for the
> +  * given VMID *
> +  */
> + kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
> + } else {
> + /*
> +  * When supporting nested virtualization, we can have multiple
> +  * VMIDs in play for each VCPU in the VM, so it's really not
> +  * worth it to try to quiesce the system and flush all the
> +  * VMIDs that may be in use, instead just nuke the whole thing.
> +  */
> + kvm_call_hyp(__kvm_flush_vm_context);
> + }
>  }
>  
>  static void kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
> 

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 43/59] KVM: arm64: nv: Trap and emulate AT instructions from virtual EL2

2019-07-01 Thread Julien Thierry
 +#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
> +#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
> +#define OP_AT_S1E2R  sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
> +#define OP_AT_S1E2W  sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
> +#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
> +#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
> +#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
> +#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
> +
>  /* Common SCTLR_ELx flags. */
>  #define SCTLR_ELx_DSSBS  (_BITUL(44))
>  #define SCTLR_ELx_ENIA   (_BITUL(31))
> diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
> index ea710f674cb6..f7af51647079 100644
> --- a/arch/arm64/kvm/hyp/Makefile
> +++ b/arch/arm64/kvm/hyp/Makefile
> @@ -19,6 +19,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += entry.o
>  obj-$(CONFIG_KVM_ARM_HOST) += switch.o
>  obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
>  obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
> +obj-$(CONFIG_KVM_ARM_HOST) += at.o
>  obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
>  
>  # KVM code is run at a different exception code with a different map, so
> diff --git a/arch/arm64/kvm/hyp/at.c b/arch/arm64/kvm/hyp/at.c
> new file mode 100644
> index ..0e938b6f8e43
> --- /dev/null
> +++ b/arch/arm64/kvm/hyp/at.c
> @@ -0,0 +1,217 @@
> +/*
> + * Copyright (C) 2017 - Linaro Ltd
> + * Author: Jintack Lim 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include 
> +#include 
> +
> +struct mmu_config {
> + u64 ttbr0;
> + u64 ttbr1;
> + u64 tcr;
> + u64 sctlr;
> + u64 vttbr;
> + u64 vtcr;
> + u64 hcr;
> +};
> +
> +static void __mmu_config_save(struct mmu_config *config)
> +{
> + config->ttbr0   = read_sysreg_el1(SYS_TTBR0);
> + config->ttbr1   = read_sysreg_el1(SYS_TTBR1);
> + config->tcr = read_sysreg_el1(SYS_TCR);
> + config->sctlr   = read_sysreg_el1(SYS_SCTLR);
> + config->vttbr   = read_sysreg(vttbr_el2);
> + config->vtcr= read_sysreg(vtcr_el2);
> + config->hcr = read_sysreg(hcr_el2);
> +}
> +
> +static void __mmu_config_restore(struct mmu_config *config)
> +{
> + write_sysreg_el1(config->ttbr0, SYS_TTBR0);
> + write_sysreg_el1(config->ttbr1, SYS_TTBR1);
> + write_sysreg_el1(config->tcr,   SYS_TCR);
> + write_sysreg_el1(config->sctlr, SYS_SCTLR);
> + write_sysreg(config->vttbr, vttbr_el2);
> + write_sysreg(config->vtcr,  vttbr_el2);

Copy-paste with terrible consequences! I guess you want to write this
one to vtcr_el2.

Actually, things still seem to run with that. It looks like that
save/restore might not be completely required.

This seems to only get called in the context of handle_exit(). At that
point I think we don't need to save the *_el2 registers. vttbr_el2 and
vtcr_el2 both get set from the vcpu content in __activate_vm() before
jumping to EL1 (or vEL2), and hcr_el2 gets set in the same manner in
__activate_traps().

I think the *_el1 regs still need the save restore as we don't hit
vcpu_load() before re-running the guest after a successful handle_exit().

So unless we plan to call the "at" emulation code within
kvm_vcpu_run_vhe(), it should be safe to drop the hcr/vttbr/vtcr from
the mmu_config.

> + write_sysreg(config->hcr,   hcr_el2);
> +
> + isb();
> +}
> +

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 39/59] KVM: arm64: nv: Move last_vcpu_ran to be per s2 mmu

2019-07-01 Thread Julien Thierry
 *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;

Nit: I'd suggest putting that right after the allocation of last_vcpu_ran.

> +
>   kvm_init_s2_mmu(mmu);

Hmm, now we have kvm_init_stage2_mmu() and an arch (arm or arm64)
specific kvm_init_s2_mmu()...

If we want to keep the s2 mmu structure different for arm and arm64, I'd
suggest at least renaming kvm_init_s2_mmu() so the distinction with
kvm_init_stage2_mmu() is clearer.

>  
>   return 0;
> @@ -1021,8 +1033,10 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
>   spin_unlock(>mmu_lock);
>  
>   /* Free the HW pgd, one page at a time */
> - if (pgd)
> + if (pgd) {
>   free_pages_exact(pgd, stage2_pgd_size(kvm));
> + free_percpu(mmu->last_vcpu_ran);
> + }
>  }
>  
>  static pud_t *stage2_get_pud(struct kvm_s2_mmu *mmu, struct 
> kvm_mmu_memory_cache *cache,
> 

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 38/59] KVM: arm64: nv: Unmap/flush shadow stage 2 page tables

2019-07-01 Thread Julien Thierry
3cbfa1ecbe..bcca27d5c481 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -1005,8 +1005,10 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct 
> kvm_vcpu *vcpu,
>* Ensure a rebooted VM will fault in RAM pages and detect if the
>* guest MMU is turned off and flush the caches as needed.
>*/
> - if (vcpu->arch.has_run_once)
> + if (vcpu->arch.has_run_once) {
>   stage2_unmap_vm(vcpu->kvm);
> + kvm_nested_s2_clear(vcpu->kvm);

The comment above kvm_nested_s2_clear() states that kvm->mmu_lock needs
to be taken, but in this state it isn't (stage2_unmap_vm() acquies the
lock and releases it).

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 35/59] KVM: arm/arm64: nv: Support multiple nested stage 2 mmu structures

2019-06-27 Thread Julien Thierry
; +
> + if (atomic_read(_mmu->refcnt) == 0)
> + break;
> + }
> + BUG_ON(atomic_read(_mmu->refcnt)); /* We have struct MMUs to spare */
> +
> + if (kvm_s2_mmu_valid(s2_mmu)) {
> + /* Clear the old state */
> + kvm_unmap_stage2_range(s2_mmu, 0, kvm_phys_size(kvm));
> + if (s2_mmu->vmid.vmid_gen)
> + kvm_call_hyp(__kvm_tlb_flush_vmid, s2_mmu);
> + }
> +
> + /*
> +  * The virtual VMID (modulo CnP) will be used as a key when matching
> +  * an existing kvm_s2_mmu.
> +  */
> + s2_mmu->vttbr = vttbr & ~1UL;
> + s2_mmu->nested_stage2_enabled = hcr & HCR_VM;
> +
> +out:
> + atomic_inc(_mmu->refcnt);
> + return s2_mmu;
> +}
> +
> +void kvm_init_s2_mmu(struct kvm_s2_mmu *mmu)
> +{
> + mmu->vttbr = 1;
> + mmu->nested_stage2_enabled = false;
> + atomic_set(>refcnt, 0);
> +}
> +
> +void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
> +{
> + if (is_hyp_ctxt(vcpu)) {
> + vcpu->arch.hw_mmu = >kvm->arch.mmu;
> + } else {
> + spin_lock(>kvm->mmu_lock);

For the allocation + initialization of s2 mmus, kvm->lock is taken in
kvm_vcpu_init_nested(). But here we take kvm->mmu_lock.

Are we in trouble? Or are we expecting
get_s2_mmu_nested()/lookup_s2_mmu() to be called only after
kvm_vcpu_init_nested() has completed on all vcpus of the VM? Otherwise
we could end up using the kvm->arch.nested_mmus when it has been freed
and before it is updated with the new pointer.

(I feel we should be taking kvm->mmu_lock in kvm_vcpu_init_nested() )

> + vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
> + spin_unlock(>kvm->mmu_lock);
> + }
> +}
> +
> +void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
> +{
> + if (vcpu->arch.hw_mmu != >kvm->arch.mmu) {
> + atomic_dec(>arch.hw_mmu->refcnt);
> + vcpu->arch.hw_mmu = NULL;
> + }
> +}
>  
>  /*
>   * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
> @@ -37,3 +191,21 @@ int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
>  
>   return -EINVAL;
>  }
> +
> +void kvm_arch_flush_shadow_all(struct kvm *kvm)
> +{
> + int i;
> +
> + for (i = 0; i < kvm->arch.nested_mmus_size; i++) {
> + struct kvm_s2_mmu *mmu = >arch.nested_mmus[i];
> +
> + WARN_ON(atomic_read(>refcnt));
> +
> + if (!atomic_read(>refcnt))
> + kvm_free_stage2_pgd(mmu);
> + }
> + kfree(kvm->arch.nested_mmus);
> + kvm->arch.nested_mmus = NULL;
> + kvm->arch.nested_mmus_size = 0;

Don't we need also to take the lock before modifying those? (Apprently
we're killing the VM, so there shouldn't be other user, but just want to
make sure...)

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 28/59] KVM: arm64: nv: Respect the virtual HCR_EL2.NV1 bit setting

2019-06-26 Thread Julien Thierry



On 06/21/2019 10:38 AM, Marc Zyngier wrote:
> From: Jintack Lim 
> 
> Forward ELR_EL1, SPSR_EL1 and VBAR_EL1 traps to the virtual EL2 if the
> virtual HCR_EL2.NV bit is set.
> 
> This is for recursive nested virtualization.
> 
> Signed-off-by: Jintack Lim 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/include/asm/kvm_arm.h |  1 +
>  arch/arm64/kvm/sys_regs.c| 19 +--
>  2 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h 
> b/arch/arm64/include/asm/kvm_arm.h
> index d21486274eeb..55f4525c112c 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -24,6 +24,7 @@
>  
>  /* Hyp Configuration Register (HCR) bits */
>  #define HCR_FWB  (UL(1) << 46)
> +#define HCR_NV1  (UL(1) << 43)
>  #define HCR_NV   (UL(1) << 42)
>  #define HCR_API  (UL(1) << 41)
>  #define HCR_APK  (UL(1) << 40)
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 0f74b9277a86..beadebcfc888 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -473,8 +473,10 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
>   if (el12_reg(p) && forward_nv_traps(vcpu))
>   return false;
>  
> - if (!el12_reg(p) && forward_vm_traps(vcpu, p))
> - return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
> + if (!el12_reg(p) && forward_vm_traps(vcpu, p)) {
> + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
> + return false;

I feel like this change is actually intended to be part of the previous
patch.

Cheers,

Julien
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 27/59] KVM: arm64: nv: Respect virtual HCR_EL2.TVM and TRVM settings

2019-06-26 Thread Julien Thierry



On 06/21/2019 10:38 AM, Marc Zyngier wrote:
> From: Jintack Lim 
> 
> Forward the EL1 virtual memory register traps to the virtual EL2 if they
> are not coming from the virtual EL2 and the virtual HCR_EL2.TVM or TRVM
> bit is set.
> 
> This is for recursive nested virtualization.
> 
> Signed-off-by: Jintack Lim 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/kvm/sys_regs.c | 24 
>  1 file changed, 24 insertions(+)
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 582d62aa48b7..0f74b9277a86 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -436,6 +436,27 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
>   return true;
>  }
>  
> +/* This function is to support the recursive nested virtualization */
> +static bool forward_vm_traps(struct kvm_vcpu *vcpu, struct sys_reg_params *p)
> +{
> + u64 hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
> +
> + /* If a trap comes from the virtual EL2, the host hypervisor handles. */
> + if (vcpu_mode_el2(vcpu))
> + return false;
> +
> + /*
> +  * If the virtual HCR_EL2.TVM or TRVM bit is set, we need to foward
> +  * this trap to the virtual EL2.
> +  */
> + if ((hcr_el2 & HCR_TVM) && p->is_write)
> + return true;
> + else if ((hcr_el2 & HCR_TRVM) && !p->is_write)
> + return true;
> +
> + return false;
> +}
> +
>  /*
>   * Generic accessor for VM registers. Only called as long as HCR_TVM
>   * is set. If the guest enables the MMU, we stop trapping the VM
> @@ -452,6 +473,9 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
>   if (el12_reg(p) && forward_nv_traps(vcpu))
>   return false;
>  
> + if (!el12_reg(p) && forward_vm_traps(vcpu, p))
> + return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));

Since we already have forward_traps(), isn't this just:

if (!el12_reg(p) && forward_traps(vcpu, p->is_write ? HCR_TVM : 
HCR_TRVM))
return true;

We could maybe simplify forward_vm_traps() to just call forward_traps()
similar to forward_nv_traps().

Cheers,

Julien
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 26/59] KVM: arm64: nv: Respect the virtual HCR_EL2.NV bit setting

2019-06-25 Thread Julien Thierry



On 06/21/2019 10:38 AM, Marc Zyngier wrote:
> From: Jintack Lim 
> 
> Forward traps due to HCR_EL2.NV bit to the virtual EL2 if they are not
> coming from the virtual EL2 and the virtual HCR_EL2.NV bit is set.
> 
> In addition to EL2 register accesses, setting NV bit will also make EL12
> register accesses trap to EL2. To emulate this for the virtual EL2,
> forword traps due to EL12 register accessses to the virtual EL2 if the
> virtual HCR_EL2.NV bit is set.
> 
> This is for recursive nested virtualization.
> 
> Signed-off-by: Jintack Lim 
> [Moved code to emulate-nested.c]
> Signed-off-by: Christoffer Dall 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/include/asm/kvm_arm.h|  1 +
>  arch/arm64/include/asm/kvm_nested.h |  2 ++
>  arch/arm64/kvm/emulate-nested.c | 28 
>  arch/arm64/kvm/handle_exit.c|  6 ++
>  arch/arm64/kvm/sys_regs.c   | 18 ++
>  5 files changed, 55 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h 
> b/arch/arm64/include/asm/kvm_arm.h
> index 48e15af2bece..d21486274eeb 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -24,6 +24,7 @@
>  
>  /* Hyp Configuration Register (HCR) bits */
>  #define HCR_FWB  (UL(1) << 46)
> +#define HCR_NV   (UL(1) << 42)
>  #define HCR_API  (UL(1) << 41)
>  #define HCR_APK  (UL(1) << 40)
>  #define HCR_TEA  (UL(1) << 37)
> diff --git a/arch/arm64/include/asm/kvm_nested.h 
> b/arch/arm64/include/asm/kvm_nested.h
> index 645e5e11b749..61e71d0d2151 100644
> --- a/arch/arm64/include/asm/kvm_nested.h
> +++ b/arch/arm64/include/asm/kvm_nested.h
> @@ -11,5 +11,7 @@ static inline bool nested_virt_in_use(const struct kvm_vcpu 
> *vcpu)
>  }
>  
>  int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe);
> +extern bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit);
> +extern bool forward_nv_traps(struct kvm_vcpu *vcpu);
>  
>  #endif /* __ARM64_KVM_NESTED_H */
> diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
> index f829b8b04dc8..c406fd688b9f 100644
> --- a/arch/arm64/kvm/emulate-nested.c
> +++ b/arch/arm64/kvm/emulate-nested.c
> @@ -24,6 +24,27 @@
>  
>  #include "trace.h"
>  
> +bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)

Should this one be static?

> +{
> + bool control_bit_set;
> +
> + if (!nested_virt_in_use(vcpu))
> + return false;
> +
> + control_bit_set = __vcpu_sys_reg(vcpu, HCR_EL2) & control_bit;
> + if (!vcpu_mode_el2(vcpu) && control_bit_set) {
> + kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
> + return true;
> + }
> + return false;
> +}
> +
> +bool forward_nv_traps(struct kvm_vcpu *vcpu)
> +{
> + return forward_traps(vcpu, HCR_NV);
> +}
> +
> +
>  /* This is borrowed from get_except_vector in inject_fault.c */
>  static u64 get_el2_except_vector(struct kvm_vcpu *vcpu,
>   enum exception_type type)
> @@ -55,6 +76,13 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
>   u64 spsr, elr, mode;
>   bool direct_eret;
>  
> + /*
> +  * Forward this trap to the virtual EL2 if the virtual
> +  * HCR_EL2.NV bit is set and this is coming from !EL2.
> +  */
> + if (forward_nv_traps(vcpu))
> + return;
> +
>   /*
>* Going through the whole put/load motions is a waste of time
>* if this is a VHE guest hypervisor returning to its own
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index 39602a4c1d61..7e8b1ec1d251 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -72,6 +72,12 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct 
> kvm_run *run)
>  {
>   int ret;
>  
> + /*
> +  * Forward this trapped smc instruction to the virtual EL2.
> +  */
> + if ((vcpu_read_sys_reg(vcpu, HCR_EL2) & HCR_TSC) && 
> forward_nv_traps(vcpu))

Not sure I understand why this would be only when the guest hyp also has
NV set.

If the guest hyp requested to trap smc instructions and that we received
one while in vel1, shouldn't we always forward it to the guest hyp to
let it implement the smc response the way it wants?

Cheers,

Julien
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 23/59] KVM: arm64: nv: Respect virtual HCR_EL2.TWX setting

2019-06-25 Thread Julien Thierry



On 06/21/2019 10:38 AM, Marc Zyngier wrote:
> From: Jintack Lim 
> 
> Forward exceptions due to WFI or WFE instructions to the virtual EL2 if
> they are not coming from the virtual EL2 and virtual HCR_EL2.TWX is set.
> 
> Signed-off-by: Jintack Lim 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/include/asm/kvm_nested.h |  2 ++
>  arch/arm64/kvm/Makefile |  1 +
>  arch/arm64/kvm/handle_exit.c| 13 +-
>  arch/arm64/kvm/nested.c | 39 +
>  4 files changed, 54 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kvm/nested.c
> 
> diff --git a/arch/arm64/include/asm/kvm_nested.h 
> b/arch/arm64/include/asm/kvm_nested.h
> index 8a3d121a0b42..645e5e11b749 100644
> --- a/arch/arm64/include/asm/kvm_nested.h
> +++ b/arch/arm64/include/asm/kvm_nested.h
> @@ -10,4 +10,6 @@ static inline bool nested_virt_in_use(const struct kvm_vcpu 
> *vcpu)
>   test_bit(KVM_ARM_VCPU_NESTED_VIRT, vcpu->arch.features);
>  }
>  
> +int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe);
> +
>  #endif /* __ARM64_KVM_NESTED_H */
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index 9e450aea7db6..f11bd8b0d837 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -36,4 +36,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
>  kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
>  
> +kvm-$(CONFIG_KVM_ARM_HOST) += nested.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += emulate-nested.o
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index e348c15c81bc..ddba212fd6ec 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -127,7 +127,18 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu, 
> struct kvm_run *run)
>   */
>  static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
> - if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
> + bool is_wfe = !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE);
> +
> + if (nested_virt_in_use(vcpu)) {
> + int ret = handle_wfx_nested(vcpu, is_wfe);
> +
> + if (ret < 0 && ret != -EINVAL)
> + return ret;
> + else if (ret >= 0)
> + return ret;

I think you can simplify this:

if (ret != -EINVAL)
return ret;

Cheers,

Julien


> + }
> +
> + if (is_wfe) {
>   trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
>   vcpu->stat.wfe_exit_stat++;
>   kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
> diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
> new file mode 100644
> index ..3872e3cf1691
> --- /dev/null
> +++ b/arch/arm64/kvm/nested.c
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) 2017 - Columbia University and Linaro Ltd.
> + * Author: Jintack Lim 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see .
> + */
> +
> +#include 
> +#include 
> +
> +#include 
> +
> +/*
> + * Inject wfx to the virtual EL2 if this is not from the virtual EL2 and
> + * the virtual HCR_EL2.TWX is set. Otherwise, let the host hypervisor
> + * handle this.
> + */
> +int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe)
> +{
> + u64 hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
> +
> + if (vcpu_mode_el2(vcpu))
> + return -EINVAL;
> +
> + if ((is_wfe && (hcr_el2 & HCR_TWE)) || (!is_wfe && (hcr_el2 & HCR_TWI)))
> + return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_hsr(vcpu));
> +
> + return -EINVAL;
> +}
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 21/59] KVM: arm64: nv: Set a handler for the system instruction traps

2019-06-25 Thread Julien Thierry



On 06/21/2019 10:38 AM, Marc Zyngier wrote:
> From: Jintack Lim 
> 
> When HCR.NV bit is set, execution of the EL2 translation regime address
> aranslation instructions and TLB maintenance instructions are trapped to
> EL2. In addition, execution of the EL1 translation regime address
> aranslation instructions and TLB maintenance instructions that are only

What's "translation regime address aranslation" ? I would guess
"aranslation" should be removed, but since the same pattern appears
twice in the commit doubt took over me :) .

> accessible from EL2 and above are trapped to EL2. In these cases,
> ESR_EL2.EC will be set to 0x18.
> 
> Change the existing handler to handle those system instructions as well
> as MRS/MSR instructions.  Emulation of each system instructions will be
> done in separate patches.
> 
> Signed-off-by: Jintack Lim 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/include/asm/kvm_coproc.h |  2 +-
>  arch/arm64/kvm/handle_exit.c|  2 +-
>  arch/arm64/kvm/sys_regs.c   | 53 +
>  arch/arm64/kvm/trace.h  |  2 +-
>  4 files changed, 50 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_coproc.h 
> b/arch/arm64/include/asm/kvm_coproc.h
> index 0b52377a6c11..1b3d21bd8adb 100644
> --- a/arch/arm64/include/asm/kvm_coproc.h
> +++ b/arch/arm64/include/asm/kvm_coproc.h
> @@ -43,7 +43,7 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct 
> kvm_run *run);
>  int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
>  int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
>  int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
> -int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
> +int kvm_handle_sys(struct kvm_vcpu *vcpu, struct kvm_run *run);
>  
>  #define kvm_coproc_table_init kvm_sys_reg_table_init
>  void kvm_sys_reg_table_init(void);
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index 2517711f034f..e662f23b63a1 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -236,7 +236,7 @@ static exit_handle_fn arm_exit_handlers[] = {
>   [ESR_ELx_EC_SMC32]  = handle_smc,
>   [ESR_ELx_EC_HVC64]  = handle_hvc,
>   [ESR_ELx_EC_SMC64]  = handle_smc,
> - [ESR_ELx_EC_SYS64]  = kvm_handle_sys_reg,
> + [ESR_ELx_EC_SYS64]  = kvm_handle_sys,
>   [ESR_ELx_EC_SVE]= handle_sve,
>   [ESR_ELx_EC_ERET]   = kvm_handle_eret,
>   [ESR_ELx_EC_IABT_LOW]   = kvm_handle_guest_abort,
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 1d1312425cf2..e711dde4511c 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -2597,6 +2597,40 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
>   return 1;
>  }
>  
> +static int emulate_tlbi(struct kvm_vcpu *vcpu,
> +  struct sys_reg_params *params)
> +{
> + /* TODO: support tlbi instruction emulation*/
> + kvm_inject_undefined(vcpu);
> + return 1;
> +}
> +
> +static int emulate_at(struct kvm_vcpu *vcpu,
> +  struct sys_reg_params *params)
> +{
> + /* TODO: support address translation instruction emulation */
> + kvm_inject_undefined(vcpu);
> + return 1;
> +}
> +
> +static int emulate_sys_instr(struct kvm_vcpu *vcpu,
> +  struct sys_reg_params *params)
> +{
> + int ret = 0;
> +
> + /* TLB maintenance instructions*/
> + if (params->CRn == 0b1000)
> + ret = emulate_tlbi(vcpu, params);
> + /* Address Translation instructions */
> + else if (params->CRn == 0b0111 && params->CRm == 0b1000)
> + ret = emulate_at(vcpu, params);
> +

So, in theory the NV bit shouldn't trap other Op0 == 1 instructions.
Would it be worth adding a WARN() or BUG() in an "else" branch here,
just in case?

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 16/59] KVM: arm64: nv: Save/Restore vEL2 sysregs

2019-06-25 Thread Julien Thierry



On 06/21/2019 10:38 AM, Marc Zyngier wrote:
> From: Andre Przywara 
> 
> Whenever we need to restore the guest's system registers to the CPU, we
> now need to take care of the EL2 system registers as well. Most of them
> are accessed via traps only, but some have an immediate effect and also
> a guest running in VHE mode would expect them to be accessible via their
> EL1 encoding, which we do not trap.
> 
> Split the current __sysreg_{save,restore}_el1_state() functions into
> handling common sysregs, then differentiate between the guest running in
> vEL2 and vEL1.
> 
> For vEL2 we write the virtual EL2 registers with an identical format directly
> into their EL1 counterpart, and translate the few registers that have a
> different format for the same effect on the execution when running a
> non-VHE guest guest hypervisor.
> 
>   [ Commit message reworked and many bug fixes applied by Marc Zyngier
> and Christoffer Dall. ]
> 
> Signed-off-by: Andre Przywara 
> Signed-off-by: Marc Zyngier 
> Signed-off-by: Christoffer Dall 
> ---
>  arch/arm64/kvm/hyp/sysreg-sr.c | 160 +++--
>  1 file changed, 153 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
> index 62866a68e852..2abb9c3ff24f 100644
> --- a/arch/arm64/kvm/hyp/sysreg-sr.c
> +++ b/arch/arm64/kvm/hyp/sysreg-sr.c

[...]

> @@ -124,10 +167,91 @@ static void __hyp_text 
> __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
>   write_sysreg(ctxt->sys_regs[TPIDRRO_EL0],   tpidrro_el0);
>  }
>  
> -static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context 
> *ctxt)
> +static void __sysreg_restore_vel2_state(struct kvm_cpu_context *ctxt)
>  {
> + u64 val;
> +
> + write_sysreg(read_cpuid_id(),   vpidr_el2);
>   write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
> - write_sysreg(ctxt->sys_regs[CSSELR_EL1],csselr_el1);
> + write_sysreg_el1(ctxt->sys_regs[MAIR_EL2],  SYS_MAIR);
> + write_sysreg_el1(ctxt->sys_regs[VBAR_EL2],  SYS_VBAR);
> + write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL2],SYS_CONTEXTIDR);
> + write_sysreg_el1(ctxt->sys_regs[AMAIR_EL2], SYS_AMAIR);
> +
> + if (__vcpu_el2_e2h_is_set(ctxt)) {
> + /*
> +  * In VHE mode those registers are compatible between
> +  * EL1 and EL2.
> +  */
> + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL2], SYS_SCTLR);
> + write_sysreg_el1(ctxt->sys_regs[CPTR_EL2],  SYS_CPACR);
> + write_sysreg_el1(ctxt->sys_regs[TTBR0_EL2], SYS_TTBR0);
> + write_sysreg_el1(ctxt->sys_regs[TTBR1_EL2], SYS_TTBR1);
> + write_sysreg_el1(ctxt->sys_regs[TCR_EL2],   SYS_TCR);
> + write_sysreg_el1(ctxt->sys_regs[CNTHCTL_EL2],   SYS_CNTKCTL);
> + } else {
> + write_sysreg_el1(translate_sctlr(ctxt->sys_regs[SCTLR_EL2]),
> +  SYS_SCTLR);
> + write_sysreg_el1(translate_cptr(ctxt->sys_regs[CPTR_EL2]),
> +  SYS_CPACR);
> + write_sysreg_el1(translate_ttbr0(ctxt->sys_regs[TTBR0_EL2]),
> +  SYS_TTBR0);
> + write_sysreg_el1(translate_tcr(ctxt->sys_regs[TCR_EL2]),
> +  SYS_TCR);
> + write_sysreg_el1(translate_cnthctl(ctxt->sys_regs[CNTHCTL_EL2]),
> +  SYS_CNTKCTL);
> + }
> +
> + /*
> +  * These registers can be modified behind our back by a fault
> +  * taken inside vEL2. Save them, always.
> +  */
> + write_sysreg_el1(ctxt->sys_regs[ESR_EL2],   SYS_ESR);
> + write_sysreg_el1(ctxt->sys_regs[AFSR0_EL2], SYS_AFSR0);
> + write_sysreg_el1(ctxt->sys_regs[AFSR1_EL2], SYS_AFSR1);
> + write_sysreg_el1(ctxt->sys_regs[FAR_EL2],   SYS_FAR);
> + write_sysreg(ctxt->sys_regs[SP_EL2],sp_el1);
> + write_sysreg_el1(ctxt->sys_regs[ELR_EL2],   SYS_ELR);
> +
> + val = __fixup_spsr_el2_write(ctxt, ctxt->sys_regs[SPSR_EL2]);
> + write_sysreg_el1(val,   SYS_SPSR);
> +}
> +
> +static void __hyp_text __sysreg_restore_vel1_state(struct kvm_cpu_context 
> *ctxt)
> +{
> + u64 mpidr;
> +
> + if (has_vhe()) {
> + struct kvm_vcpu *vcpu;
> +
> + /*
> +  * Warning: this hack only works on VHE, because we only
> +  * call this with the *guest* context, which is part of
> +  * struct kvm_vcpu. On a host context, you'd get pure junk.
> +  */
> + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);

This seems very fragile, just to find out whether the guest has hyp
capabilities. It would be at least nice to make sure this is indeed a
guest context.

The *clean* way to do it could be to have a pointer to kvm_vcpu in the
kvm_cpu_context which would be NULL for host contexts.

Otherwise, 

Re: [PATCH 15/59] KVM: arm64: nv: Refactor vcpu_{read,write}_sys_reg

2019-06-24 Thread Julien Thierry
ase TPIDRRO_EL0:   write_sysreg_s(val, SYS_TPIDRRO_EL0);   break;
> + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break;
> + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12);break;
> + case CNTKCTL_EL1:   write_sysreg_s(val, SYS_CNTKCTL_EL12);  break;
> + case PAR_EL1:   write_sysreg_s(val, SYS_PAR_EL1);   break;
> + case DACR32_EL2:write_sysreg_s(val, SYS_DACR32_EL2);break;
> + case IFSR32_EL2:write_sysreg_s(val, SYS_IFSR32_EL2);break;
> + case DBGVCR32_EL2:  write_sysreg_s(val, SYS_DBGVCR32_EL2);  break;
> + default:return false;
> + }
> +
> + return true;
> +}
> +
>  u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
>  {
> - u64 val;
> + u64 val = 0x8badf00d8badf00d;
>  
>   if (!vcpu->arch.sysregs_loaded_on_cpu)
> - goto immediate_read;
> + goto memory_read;
>  
>   if (unlikely(sysreg_is_el2(reg))) {
>   const struct el2_sysreg_map *el2_reg;
>  
>   if (!is_hyp_ctxt(vcpu))
> - goto immediate_read;
> + goto memory_read;
>  
>   switch (reg) {
> + case ELR_EL2:
> + return read_sysreg_el1(SYS_ELR);

Hmmm, This change feels a bit out of place.

Also, patch 13 added ELR_EL2 and SP_EL2 to the switch cases for physical
sysreg accesses. Now ELR_EL2 is moved out of the main switch cases and
SP_EL2 is completely omitted.

I'd say either patch 13 needs to be reworked or there is a separate
patch that should be extracted from this patch to have an intermediate
state, or the commit message on this patch should be more detailed.

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 13/59] KVM: arm64: nv: Handle virtual EL2 registers in vcpu_read/write_sys_reg()

2019-06-24 Thread Julien Thierry
:return read_sysreg(sp_el1);
> + case ELR_EL2:   return read_sysreg_el1(SYS_ELR);
>   }
>  
>  immediate_read:
> @@ -125,6 +258,34 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, 
> int reg)
>   if (!vcpu->arch.sysregs_loaded_on_cpu)
>   goto immediate_write;
>  
> + if (unlikely(sysreg_is_el2(reg))) {
> + const struct el2_sysreg_map *el2_reg;
> +
> + if (!is_hyp_ctxt(vcpu))
> + goto immediate_write;
> +
> + /* Store the EL2 version in the sysregs array. */
> + __vcpu_sys_reg(vcpu, reg) = val;
> +
> + el2_reg = find_el2_sysreg(nested_sysreg_map, reg);
> + if (el2_reg) {
> + /* Does this register have an EL1 counterpart? */
> + if (el2_reg->mapping == __INVALID_SYSREG__)
> + return;

As in the read case, this is never reached and we'll go through the
switch case.

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 06/59] KVM: arm64: nv: Allow userspace to set PSR_MODE_EL2x

2019-06-21 Thread Julien Thierry



On 21/06/2019 10:37, Marc Zyngier wrote:
> From: Christoffer Dall 
> 
> We were not allowing userspace to set a more privileged mode for the VCPU
> than EL1, but we should allow this when nested virtualization is enabled
> for the VCPU.
> 
> Signed-off-by: Christoffer Dall 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/kvm/guest.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index 3ae2f82fca46..4c35b5d51e21 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -37,6 +37,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include "trace.h"
> @@ -194,6 +195,11 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const 
> struct kvm_one_reg *reg)
>   if (vcpu_el1_is_32bit(vcpu))
>   return -EINVAL;
>   break;
> + case PSR_MODE_EL2h:
> + case PSR_MODE_EL2t:
> + if (vcpu_el1_is_32bit(vcpu) || 
> !nested_virt_in_use(vcpu))

This condition reads a bit weirdly. Why do we care about anything else
than !nested_virt_in_use() ?

If nested virt is not in use then obviously we return the error.

If nested virt is in use then why do we care about EL1? Or should this
test read as "highest_el_is_32bit" ?

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 04/59] KVM: arm64: nv: Introduce nested virtualization VCPU feature

2019-06-21 Thread Julien Thierry



On 21/06/2019 10:37, Marc Zyngier wrote:
> From: Christoffer Dall 
> 
> Introduce the feature bit and a primitive that checks if the feature is
> set behind a static key check based on the cpus_have_const_cap check.
> 
> Checking nested_virt_in_use() on systems without nested virt enabled
> should have neglgible overhead.
> 
> We don't yet allow userspace to actually set this feature.
> 
> Signed-off-by: Christoffer Dall 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm/include/asm/kvm_nested.h   |  9 +
>  arch/arm64/include/asm/kvm_nested.h | 13 +
>  arch/arm64/include/uapi/asm/kvm.h   |  1 +
>  3 files changed, 23 insertions(+)
>  create mode 100644 arch/arm/include/asm/kvm_nested.h
>  create mode 100644 arch/arm64/include/asm/kvm_nested.h
> 
> diff --git a/arch/arm/include/asm/kvm_nested.h 
> b/arch/arm/include/asm/kvm_nested.h
> new file mode 100644
> index ..124ff6445f8f
> --- /dev/null
> +++ b/arch/arm/include/asm/kvm_nested.h
> @@ -0,0 +1,9 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ARM_KVM_NESTED_H
> +#define __ARM_KVM_NESTED_H
> +
> +#include 
> +
> +static inline bool nested_virt_in_use(const struct kvm_vcpu *vcpu) { return 
> false; }
> +
> +#endif /* __ARM_KVM_NESTED_H */
> diff --git a/arch/arm64/include/asm/kvm_nested.h 
> b/arch/arm64/include/asm/kvm_nested.h
> new file mode 100644
> index ..8a3d121a0b42
> --- /dev/null
> +++ b/arch/arm64/include/asm/kvm_nested.h
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ARM64_KVM_NESTED_H
> +#define __ARM64_KVM_NESTED_H
> +
> +#include 
> +
> +static inline bool nested_virt_in_use(const struct kvm_vcpu *vcpu)
> +{
> + return cpus_have_const_cap(ARM64_HAS_NESTED_VIRT) &&
> + test_bit(KVM_ARM_VCPU_NESTED_VIRT, vcpu->arch.features);

Nit: You could make it even cheaper for some systems by adding
IS_DEFINED(CONFIG_ARM64_VHE). It would also make the dependency between
NV and VHE more explicit.

Otherwise:

Reviewed-by: Julien Thierry 

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 03/59] arm64: Add ARM64_HAS_NESTED_VIRT cpufeature

2019-06-21 Thread Julien Thierry



On 21/06/2019 10:37, Marc Zyngier wrote:
> From: Jintack Lim 
> 
> Add a new ARM64_HAS_NESTED_VIRT feature to indicate that the
> CPU has the ARMv8.3 nested virtualization capability.
> 
> This will be used to support nested virtualization in KVM.
> 
> Signed-off-by: Jintack Lim 
> Signed-off-by: Andre Przywara 
> Signed-off-by: Christoffer Dall 
> Signed-off-by: Marc Zyngier 
> ---
>  .../admin-guide/kernel-parameters.txt |  4 +++
>  arch/arm64/include/asm/cpucaps.h  |  3 ++-
>  arch/arm64/include/asm/sysreg.h   |  1 +
>  arch/arm64/kernel/cpufeature.c| 26 +++
>  4 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt 
> b/Documentation/admin-guide/kernel-parameters.txt
> index 138f6664b2e2..202bb2115d83 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2046,6 +2046,10 @@
>   [KVM,ARM] Allow use of GICv4 for direct injection of
>   LPIs.
>  
> + kvm-arm.nested=
> + [KVM,ARM] Allow nested virtualization in KVM/ARM.
> + Default is 0 (disabled)
> +

Once the kernel has been built with nested guest support, what do we
gain from having it disabled by default?

It seems a bit odd since the guests have to opt-in for the capability of
running guests of their own.

Is it it likely to have negative impact a negative impact on the host
kernel? Or on guests that do not request use of nested virt?

If not I feel that this kernel parameter should be dropped.

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH kvmtool 08/16] arm/pci: Do not use first PCI IO space bytes for devices

2019-06-14 Thread Julien Thierry
Hi Andre,

(sorry for the delay in reply)

On 05/04/2019 16:31, Andre Przywara wrote:
> On Thu, 7 Mar 2019 08:36:09 +
> Julien Thierry  wrote:
> 
> Hi,
> 
>> Linux has this convention that the lower 0x1000 bytes of the IO space
>> should not be used. (cf PCIBIOS_MIN_IO).
>>
>> Just allocate those bytes to prevent future allocation assigning it to
>> devices.
>>
>> Signed-off-by: Julien Thierry 
>> ---
>>  arm/pci.c | 3 +++
>>  1 file changed, 3 insertions(+)
>>
>> diff --git a/arm/pci.c b/arm/pci.c
>> index 83238ca..559e0cf 100644
>> --- a/arm/pci.c
>> +++ b/arm/pci.c
>> @@ -37,6 +37,9 @@ void pci__arm_init(struct kvm *kvm)
>>  
>>  /* Make PCI port allocation start at a properly aligned address */
>>  pci_get_io_space_block(align_pad);
>> +
>> +/* Convention, don't allocate first 0x1000 bytes of PCI IO */
>> +pci_get_io_space_block(0x1000);
> 
> Is this the same problem with mixing up I/O and MMIO space as in the other 
> patch?
> io_space means MMIO, right?
> 

Oh yes, you're right. Thanks for catching that (and in the other patch
as well).

However, fixing it unveiled a bug which apparently requires me to change
a bunch of things w.r.t. how we handle the configuration. At boot time,
Linux (without probe only) reassigns bars without disabling the device
response (it assumes that none of the devices it can configure are being
used/accessed).

This means that during the reassignment, bars from different or same
devices can temporarily alias/overlap each other during boot time. And
the current handling of PCI io/mmio region doesn't support that.

I'll rework this to make things a little bit more flexible.

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v9 4/5] KVM: arm/arm64: remove pmc->bitmask

2019-06-13 Thread Julien Thierry
Hi Andrew,

On 12/06/2019 20:04, Andrew Murray wrote:
> We currently use pmc->bitmask to determine the width of the pmc - however
> it's superfluous as the pmc index already describes if the pmc is a cycle
> counter or event counter. The architecture clearly describes the widths of
> these counters.
> 
> Let's remove the bitmask to simplify the code.
> 
> Signed-off-by: Andrew Murray 
> ---
>  include/kvm/arm_pmu.h |  1 -
>  virt/kvm/arm/pmu.c| 19 +--
>  2 files changed, 9 insertions(+), 11 deletions(-)
> 
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index b73f31baca52..2f0e28dc5a9e 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -28,7 +28,6 @@
>  struct kvm_pmc {
>   u8 idx; /* index into the pmu->pmc array */
>   struct perf_event *perf_event;
> - u64 bitmask;
>  };
>  
>  struct kvm_pmu {
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index ae1e886d4a1a..88ce24ae0b45 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -47,7 +47,10 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 
> select_idx)
>   counter += perf_event_read_value(pmc->perf_event, ,
>);
>  
> - return counter & pmc->bitmask;
> + if (select_idx != ARMV8_PMU_CYCLE_IDX)
> + counter = lower_32_bits(counter);

Shouldn't this depend on PMCR.LC as well? If PMCR.LC is clear we only
want the lower 32bits of the cycle counter.

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 1/9] KVM: arm/arm64: vgic: Add LPI translation cache definition

2019-06-12 Thread Julien Thierry



On 12/06/2019 11:58, Julien Thierry wrote:
> 
> 
> On 12/06/2019 10:52, Marc Zyngier wrote:
>> Hi Julien,
>>
>> On Wed, 12 Jun 2019 09:16:21 +0100,
>> Julien Thierry  wrote:
>>>
>>> Hi Marc,
>>>
>>> On 11/06/2019 18:03, Marc Zyngier wrote:
>>>> Add the basic data structure that expresses an MSI to LPI
>>>> translation as well as the allocation/release hooks.
>>>>
>>>> THe size of the cache is arbitrarily defined as 4*nr_vcpus.
>>>>
>>>
>>> The size has been arbitrarily changed to 16*nr_vcpus :) .
>>
>> Well spotted! ;-)
>>
>>>
>>> Nit: The*
>>
>> Ah, usual lazy finger on the Shift key... One day I'll learn to type.
>>
>>>
>>>> Signed-off-by: Marc Zyngier 
>>>> ---
>>>>  include/kvm/arm_vgic.h|  3 +++
>>>>  virt/kvm/arm/vgic/vgic-init.c |  5 
>>>>  virt/kvm/arm/vgic/vgic-its.c  | 49 +++
>>>>  virt/kvm/arm/vgic/vgic.h  |  2 ++
>>>>  4 files changed, 59 insertions(+)
>>>>
> 
> [...]
> 
>>>> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
>>>> index 44ceaccb18cf..ce9bcddeb7f1 100644
>>>> --- a/virt/kvm/arm/vgic/vgic-its.c
>>>> +++ b/virt/kvm/arm/vgic/vgic-its.c
>>>> @@ -149,6 +149,14 @@ struct its_ite {
>>>>u32 event_id;
>>>>  };
>>>>  
>>>> +struct vgic_translation_cache_entry {
>>>> +  struct list_headentry;
>>>> +  phys_addr_t db;
>>>> +  u32 devid;
>>>> +  u32 eventid;
>>>> +  struct vgic_irq *irq;
>>>> +};
>>>> +
>>>>  /**
>>>>   * struct vgic_its_abi - ITS abi ops and settings
>>>>   * @cte_esz: collection table entry size
>>>> @@ -1668,6 +1676,45 @@ static int vgic_register_its_iodev(struct kvm *kvm, 
>>>> struct vgic_its *its,
>>>>return ret;
>>>>  }
>>>>  
>>>> +/* Default is 16 cached LPIs per vcpu */
>>>> +#define LPI_DEFAULT_PCPU_CACHE_SIZE   16
>>>> +
>>>> +void vgic_lpi_translation_cache_init(struct kvm *kvm)
>>>> +{
>>>> +  struct vgic_dist *dist = >arch.vgic;
>>>> +  unsigned int sz;
>>>> +  int i;
>>>> +
>>>> +  if (!list_empty(>lpi_translation_cache))
>>>> +  return;
>>>> +
>>>> +  sz = atomic_read(>online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
>>>> +
>>>> +  for (i = 0; i < sz; i++) {
>>>> +  struct vgic_translation_cache_entry *cte;
>>>> +
>>>> +  /* An allocation failure is not fatal */
>>>> +  cte = kzalloc(sizeof(*cte), GFP_KERNEL);
>>>> +  if (WARN_ON(!cte))
>>>> +  break;
>>>> +
>>>> +  INIT_LIST_HEAD(>entry);
>>>> +  list_add(>entry, >lpi_translation_cache);
>>>
>>> Going through the series, it looks like this list is either empty
>>> (before the cache init) or has a fixed number
>>> (LPI_DEFAULT_PCPU_CACHE_SIZE * nr_cpus) of entries.
>>
>> Well, it could also fail when allocating one of the entry, meaning we
>> can have an allocation ranging from 0 to (LPI_DEFAULT_PCPU_CACHE_SIZE
>> * nr_cpus) entries.
>>
>>> And the list never grows nor shrinks throughout the series, so it
>>> seems odd to be using a list here.
>>>
>>> Is there a reason for not using a dynamically allocated array instead of
>>> the list? (does list_move() provide a big perf advantage over swapping
>>> the data from one array entry to another? Or is there some other
>>> facility I am missing?
>>
>> The idea was to make the LRU policy cheap, on the assumption that
>> list_move (which is only a couple of pointer updates) is cheaper than
>> a memmove if you want to keep the array ordered. If we exclude the
>> list head, we end-up with 24 bytes per entry to move down to make room
>> for the new entry at the head of the array. For large caches that miss
>> very often, this will hurt badly. But is that really a problem? I
>> don't know.
>>
> 
> Yes, I realized afterwards that the LRU uses the fact you can easily
> move list entries without modifying the rest of the list.
> 
>> We could allocate an array as y

Re: [PATCH v2 1/9] KVM: arm/arm64: vgic: Add LPI translation cache definition

2019-06-12 Thread Julien Thierry



On 12/06/2019 10:52, Marc Zyngier wrote:
> Hi Julien,
> 
> On Wed, 12 Jun 2019 09:16:21 +0100,
> Julien Thierry  wrote:
>>
>> Hi Marc,
>>
>> On 11/06/2019 18:03, Marc Zyngier wrote:
>>> Add the basic data structure that expresses an MSI to LPI
>>> translation as well as the allocation/release hooks.
>>>
>>> THe size of the cache is arbitrarily defined as 4*nr_vcpus.
>>>
>>
>> The size has been arbitrarily changed to 16*nr_vcpus :) .
> 
> Well spotted! ;-)
> 
>>
>> Nit: The*
> 
> Ah, usual lazy finger on the Shift key... One day I'll learn to type.
> 
>>
>>> Signed-off-by: Marc Zyngier 
>>> ---
>>>  include/kvm/arm_vgic.h|  3 +++
>>>  virt/kvm/arm/vgic/vgic-init.c |  5 
>>>  virt/kvm/arm/vgic/vgic-its.c  | 49 +++
>>>  virt/kvm/arm/vgic/vgic.h  |  2 ++
>>>  4 files changed, 59 insertions(+)
>>>

[...]

>>> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
>>> index 44ceaccb18cf..ce9bcddeb7f1 100644
>>> --- a/virt/kvm/arm/vgic/vgic-its.c
>>> +++ b/virt/kvm/arm/vgic/vgic-its.c
>>> @@ -149,6 +149,14 @@ struct its_ite {
>>> u32 event_id;
>>>  };
>>>  
>>> +struct vgic_translation_cache_entry {
>>> +   struct list_headentry;
>>> +   phys_addr_t db;
>>> +   u32 devid;
>>> +   u32 eventid;
>>> +   struct vgic_irq *irq;
>>> +};
>>> +
>>>  /**
>>>   * struct vgic_its_abi - ITS abi ops and settings
>>>   * @cte_esz: collection table entry size
>>> @@ -1668,6 +1676,45 @@ static int vgic_register_its_iodev(struct kvm *kvm, 
>>> struct vgic_its *its,
>>> return ret;
>>>  }
>>>  
>>> +/* Default is 16 cached LPIs per vcpu */
>>> +#define LPI_DEFAULT_PCPU_CACHE_SIZE16
>>> +
>>> +void vgic_lpi_translation_cache_init(struct kvm *kvm)
>>> +{
>>> +   struct vgic_dist *dist = >arch.vgic;
>>> +   unsigned int sz;
>>> +   int i;
>>> +
>>> +   if (!list_empty(>lpi_translation_cache))
>>> +   return;
>>> +
>>> +   sz = atomic_read(>online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
>>> +
>>> +   for (i = 0; i < sz; i++) {
>>> +   struct vgic_translation_cache_entry *cte;
>>> +
>>> +   /* An allocation failure is not fatal */
>>> +   cte = kzalloc(sizeof(*cte), GFP_KERNEL);
>>> +   if (WARN_ON(!cte))
>>> +   break;
>>> +
>>> +   INIT_LIST_HEAD(>entry);
>>> +   list_add(>entry, >lpi_translation_cache);
>>
>> Going through the series, it looks like this list is either empty
>> (before the cache init) or has a fixed number
>> (LPI_DEFAULT_PCPU_CACHE_SIZE * nr_cpus) of entries.
> 
> Well, it could also fail when allocating one of the entry, meaning we
> can have an allocation ranging from 0 to (LPI_DEFAULT_PCPU_CACHE_SIZE
> * nr_cpus) entries.
> 
>> And the list never grows nor shrinks throughout the series, so it
>> seems odd to be using a list here.
>>
>> Is there a reason for not using a dynamically allocated array instead of
>> the list? (does list_move() provide a big perf advantage over swapping
>> the data from one array entry to another? Or is there some other
>> facility I am missing?
> 
> The idea was to make the LRU policy cheap, on the assumption that
> list_move (which is only a couple of pointer updates) is cheaper than
> a memmove if you want to keep the array ordered. If we exclude the
> list head, we end-up with 24 bytes per entry to move down to make room
> for the new entry at the head of the array. For large caches that miss
> very often, this will hurt badly. But is that really a problem? I
> don't know.
> 

Yes, I realized afterwards that the LRU uses the fact you can easily
move list entries without modifying the rest of the list.

> We could allocate an array as you suggest, and use a linked list
> inside the array. Or something else. I'm definitely open to
> suggestion!

If it there turns out to be some benefit to just you a fixed array, we
could use a simple ring buffer. Have one pointer on the most recently
inserted entry (and we know the next insertion will take place on the
entry "just before" it) and one pointer on the least recently used entry
(which gets moved when the most recently inserted

Re: [PATCH v2 1/9] KVM: arm/arm64: vgic: Add LPI translation cache definition

2019-06-12 Thread Julien Thierry



On 12/06/2019 09:16, Julien Thierry wrote:
> Hi Marc,
> 
> On 11/06/2019 18:03, Marc Zyngier wrote:

[...]

>> +
>> +void vgic_lpi_translation_cache_init(struct kvm *kvm)
>> +{
>> +struct vgic_dist *dist = >arch.vgic;
>> +unsigned int sz;
>> +int i;
>> +
>> +if (!list_empty(>lpi_translation_cache))
>> +return;
>> +
>> +sz = atomic_read(>online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
>> +
>> +for (i = 0; i < sz; i++) {
>> +struct vgic_translation_cache_entry *cte;
>> +
>> +/* An allocation failure is not fatal */
>> +cte = kzalloc(sizeof(*cte), GFP_KERNEL);
>> +if (WARN_ON(!cte))
>> +break;
>> +
>> +INIT_LIST_HEAD(>entry);
>> +list_add(>entry, >lpi_translation_cache);
> 
> Going through the series, it looks like this list is either empty
> (before the cache init) or has a fixed number
> (LPI_DEFAULT_PCPU_CACHE_SIZE * nr_cpus) of entries. And the list never
> grows nor shrinks throughout the series, so it seems odd to be using a
> list here.
> 
> Is there a reason for not using a dynamically allocated array instead of
> the list? (does list_move() provide a big perf advantage over swapping
> the data from one array entry to another? Or is there some other
> facility I am missing?
> 

Scratch that, I realized having the list makes it easier to implement
the LRU policy later in the series.

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 1/9] KVM: arm/arm64: vgic: Add LPI translation cache definition

2019-06-12 Thread Julien Thierry
Hi Marc,

On 11/06/2019 18:03, Marc Zyngier wrote:
> Add the basic data structure that expresses an MSI to LPI
> translation as well as the allocation/release hooks.
> 
> THe size of the cache is arbitrarily defined as 4*nr_vcpus.
>

The size has been arbitrarily changed to 16*nr_vcpus :) .

Nit: The*

> Signed-off-by: Marc Zyngier 
> ---
>  include/kvm/arm_vgic.h|  3 +++
>  virt/kvm/arm/vgic/vgic-init.c |  5 
>  virt/kvm/arm/vgic/vgic-its.c  | 49 +++
>  virt/kvm/arm/vgic/vgic.h  |  2 ++
>  4 files changed, 59 insertions(+)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index c36c86f1ec9a..ca7bcf52dc85 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -260,6 +260,9 @@ struct vgic_dist {
>   struct list_headlpi_list_head;
>   int lpi_list_count;
>  
> + /* LPI translation cache */
> + struct list_headlpi_translation_cache;
> +
>   /* used by vgic-debug */
>   struct vgic_state_iter *iter;
>  
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index 3bdb31eaed64..c7c4c77dd430 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -64,6 +64,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
>   struct vgic_dist *dist = >arch.vgic;
>  
>   INIT_LIST_HEAD(>lpi_list_head);
> + INIT_LIST_HEAD(>lpi_translation_cache);
>   raw_spin_lock_init(>lpi_list_lock);
>  }
>  
> @@ -305,6 +306,7 @@ int vgic_init(struct kvm *kvm)
>   }
>  
>   if (vgic_has_its(kvm)) {
> + vgic_lpi_translation_cache_init(kvm);
>   ret = vgic_v4_init(kvm);
>   if (ret)
>   goto out;
> @@ -346,6 +348,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>   INIT_LIST_HEAD(>rd_regions);
>   }
>  
> + if (vgic_has_its(kvm))
> + vgic_lpi_translation_cache_destroy(kvm);
> +
>   if (vgic_supports_direct_msis(kvm))
>   vgic_v4_teardown(kvm);
>  }
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index 44ceaccb18cf..ce9bcddeb7f1 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -149,6 +149,14 @@ struct its_ite {
>   u32 event_id;
>  };
>  
> +struct vgic_translation_cache_entry {
> + struct list_headentry;
> + phys_addr_t db;
> + u32 devid;
> + u32 eventid;
> + struct vgic_irq *irq;
> +};
> +
>  /**
>   * struct vgic_its_abi - ITS abi ops and settings
>   * @cte_esz: collection table entry size
> @@ -1668,6 +1676,45 @@ static int vgic_register_its_iodev(struct kvm *kvm, 
> struct vgic_its *its,
>   return ret;
>  }
>  
> +/* Default is 16 cached LPIs per vcpu */
> +#define LPI_DEFAULT_PCPU_CACHE_SIZE  16
> +
> +void vgic_lpi_translation_cache_init(struct kvm *kvm)
> +{
> + struct vgic_dist *dist = >arch.vgic;
> + unsigned int sz;
> + int i;
> +
> + if (!list_empty(>lpi_translation_cache))
> + return;
> +
> + sz = atomic_read(>online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
> +
> + for (i = 0; i < sz; i++) {
> + struct vgic_translation_cache_entry *cte;
> +
> + /* An allocation failure is not fatal */
> + cte = kzalloc(sizeof(*cte), GFP_KERNEL);
> + if (WARN_ON(!cte))
> + break;
> +
> + INIT_LIST_HEAD(>entry);
> + list_add(>entry, >lpi_translation_cache);

Going through the series, it looks like this list is either empty
(before the cache init) or has a fixed number
(LPI_DEFAULT_PCPU_CACHE_SIZE * nr_cpus) of entries. And the list never
grows nor shrinks throughout the series, so it seems odd to be using a
list here.

Is there a reason for not using a dynamically allocated array instead of
the list? (does list_move() provide a big perf advantage over swapping
the data from one array entry to another? Or is there some other
facility I am missing?

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v8 6/6] KVM: arm/arm64: support chained PMU counters

2019-06-10 Thread Julien Thierry
Hi Andrew,

On 22/05/2019 16:30, Andrew Murray wrote:
> ARMv8 provides support for chained PMU counters, where an event type
> of 0x001E is set for odd-numbered counters, the event counter will
> increment by one for each overflow of the preceding even-numbered
> counter. Let's emulate this in KVM by creating a 64 bit perf counter
> when a user chains two emulated counters together.
> 
> For chained events we only support generating an overflow interrupt
> on the high counter. We use the attributes of the low counter to
> determine the attributes of the perf event.
> 

I think this looks good now. Once the previous patch is fixed you can add:

Reviewed-by: Julien Thierry 

Cheers,

Julien

> Suggested-by: Marc Zyngier 
> Signed-off-by: Andrew Murray 
> ---
>  include/kvm/arm_pmu.h |   2 +
>  virt/kvm/arm/pmu.c| 248 --
>  2 files changed, 215 insertions(+), 35 deletions(-)
> 
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index 2f0e28dc5a9e..589f49ed8cf8 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -22,6 +22,7 @@
>  #include 
>  
>  #define ARMV8_PMU_CYCLE_IDX  (ARMV8_PMU_MAX_COUNTERS - 1)
> +#define ARMV8_PMU_MAX_COUNTER_PAIRS  ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
>  
>  #ifdef CONFIG_KVM_ARM_PMU
>  
> @@ -33,6 +34,7 @@ struct kvm_pmc {
>  struct kvm_pmu {
>   int irq_num;
>   struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
> + DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
>   bool ready;
>   bool created;
>   bool irq_level;
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> index c4e2bc213617..796bcf76911f 100644
> --- a/virt/kvm/arm/pmu.c
> +++ b/virt/kvm/arm/pmu.c
> @@ -25,29 +25,129 @@
>  #include 
>  
>  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
> +
> +#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
> +
> +static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
> +{
> + struct kvm_pmu *pmu;
> + struct kvm_vcpu_arch *vcpu_arch;
> +
> + pmc -= pmc->idx;
> + pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
> + vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
> + return container_of(vcpu_arch, struct kvm_vcpu, arch);
> +}
> +
>  /**
> - * kvm_pmu_get_counter_value - get PMU counter value
> + * kvm_pmu_pmc_is_chained - determine if the pmc is chained
> + * @pmc: The PMU counter pointer
> + */
> +static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
> +{
> + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
> +
> + return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
> +}
> +
> +/**
> + * kvm_pmu_pmc_is_high_counter - determine if select_idx is a high/low 
> counter
> + * @select_idx: The counter index
> + */
> +static bool kvm_pmu_pmc_is_high_counter(u64 select_idx)
> +{
> + return select_idx & 0x1;
> +}
> +
> +/**
> + * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
> + * @pmc: The PMU counter pointer
> + *
> + * When a pair of PMCs are chained together we use the low counter 
> (canonical)
> + * to hold the underlying perf event.
> + */
> +static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
> +{
> + if (kvm_pmu_pmc_is_chained(pmc) &&
> + kvm_pmu_pmc_is_high_counter(pmc->idx))
> + return pmc - 1;
> +
> + return pmc;
> +}
> +
> +/**
> + * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
>   * @vcpu: The vcpu pointer
>   * @select_idx: The counter index
>   */
> -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
> +static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 
> select_idx)
>  {
> - u64 counter, reg, enabled, running;
> - struct kvm_pmu *pmu = >arch.pmu;
> - struct kvm_pmc *pmc = >pmc[select_idx];
> + u64 eventsel, reg;
>  
> - reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
> -   ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
> - counter = __vcpu_sys_reg(vcpu, reg);
> + select_idx |= 0x1;
> +
> + if (select_idx == ARMV8_PMU_CYCLE_IDX)
> + return false;
>  
> - /* The real counter value is equal to the value of counter register plus
> + reg = PMEVTYPER0_EL0 + select_idx;
> + eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT;
> +
> + return armv8pmu_evtype_is_chain(eventsel);
> +}
> +
> +/**
> + * kvm_pmu_get_pair_counter_value - get PMU counter value
> + * @vcpu: The vcpu pointer
> + * @pmc: The PMU counter pointer
> + */
> +static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu

Re: [PATCH 3/8] KVM: arm/arm64: vgic-its: Cache successful MSI->LPI translation

2019-06-07 Thread Julien Thierry



On 07/06/2019 09:51, Marc Zyngier wrote:
> On 07/06/2019 09:35, Julien Thierry wrote:
>> Hi Marc,
>>
>> On 06/06/2019 17:54, Marc Zyngier wrote:
>>> On a successful translation, preserve the parameters in the LPI
>>> translation cache. Each translation is reusing the last slot
>>> in the list, naturally evincting the least recently used entry.
>>>
>>> Signed-off-by: Marc Zyngier 
>>> ---
>>>  virt/kvm/arm/vgic/vgic-its.c | 41 
>>>  1 file changed, 41 insertions(+)
>>>
>>> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
>>> index 5758504fd934..bc370b6c5afa 100644
>>> --- a/virt/kvm/arm/vgic/vgic-its.c
>>> +++ b/virt/kvm/arm/vgic/vgic-its.c
>>> @@ -538,6 +538,45 @@ static unsigned long vgic_mmio_read_its_idregs(struct 
>>> kvm *kvm,
>>> return 0;
>>>  }
>>>  
>>> +static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its 
>>> *its,
>>> +  u32 devid, u32 eventid,
>>> +  struct vgic_irq *irq)
>>> +{
>>> +   struct vgic_dist *dist = >arch.vgic;
>>> +   struct vgic_translation_cache_entry *cte;
>>> +   unsigned long flags;
>>> +
>>> +   /* Do not cache a directly injected interrupt */
>>> +   if (irq->hw)
>>> +   return;
>>> +
>>> +   raw_spin_lock_irqsave(>lpi_list_lock, flags);
>>> +
>>> +   /* Always reuse the last entry (LRU policy) */
>>> +   cte = list_last_entry(>lpi_translation_cache,
>>> + typeof(*cte), entry);
>>> +
>>> +   /*
>>> +* Caching the translation implies having an extra reference
>>> +* to the interrupt, so drop the potential reference on what
>>> +* was in the cache, and increment it on the new interrupt.
>>> +*/
>>> +   if (cte->irq)
>>> +   __vgic_put_lpi_locked(kvm, cte->irq);
>>> +
>>> +   vgic_get_irq_kref(irq);
>>
>> If cte->irq == irq, can we avoid the ref putting and getting and just
>> move the list entry (and update cte)?
> But in that case, we should have hit in the cache the first place, no?
> Or is there a particular race I'm not thinking of just yet?
> 

Yes, I had not made it far enough in the series to see the cache hits
and assumed this function would also be used to update the LRU policy.

You can dismiss this comment, sorry for the noise.

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 3/8] KVM: arm/arm64: vgic-its: Cache successful MSI->LPI translation

2019-06-07 Thread Julien Thierry
Hi Marc,

On 06/06/2019 17:54, Marc Zyngier wrote:
> On a successful translation, preserve the parameters in the LPI
> translation cache. Each translation is reusing the last slot
> in the list, naturally evincting the least recently used entry.
> 
> Signed-off-by: Marc Zyngier 
> ---
>  virt/kvm/arm/vgic/vgic-its.c | 41 
>  1 file changed, 41 insertions(+)
> 
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index 5758504fd934..bc370b6c5afa 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -538,6 +538,45 @@ static unsigned long vgic_mmio_read_its_idregs(struct 
> kvm *kvm,
>   return 0;
>  }
>  
> +static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
> +u32 devid, u32 eventid,
> +struct vgic_irq *irq)
> +{
> + struct vgic_dist *dist = >arch.vgic;
> + struct vgic_translation_cache_entry *cte;
> + unsigned long flags;
> +
> + /* Do not cache a directly injected interrupt */
> + if (irq->hw)
> + return;
> +
> + raw_spin_lock_irqsave(>lpi_list_lock, flags);
> +
> + /* Always reuse the last entry (LRU policy) */
> + cte = list_last_entry(>lpi_translation_cache,
> +   typeof(*cte), entry);
> +
> + /*
> +  * Caching the translation implies having an extra reference
> +  * to the interrupt, so drop the potential reference on what
> +  * was in the cache, and increment it on the new interrupt.
> +  */
> + if (cte->irq)
> + __vgic_put_lpi_locked(kvm, cte->irq);
> +
> + vgic_get_irq_kref(irq);

If cte->irq == irq, can we avoid the ref putting and getting and just
move the list entry (and update cte)?

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 1/8] KVM: arm/arm64: vgic: Add LPI translation cache definition

2019-06-07 Thread Julien Thierry
>  
>   mutex_init(>its_lock);
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index abeeffabc456..a58e1b263dca 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -316,6 +316,9 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu 
> *vcpu, u32 **intid_ptr);
>  int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
>u32 devid, u32 eventid, struct vgic_irq **irq);
>  struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
> +void vgic_lpi_translation_cache_init(struct kvm *kvm);
> +
> +#define LPI_CACHE_SIZE(kvm)  (atomic_read(&(kvm)->online_vcpus) * 4)
>  
>  bool vgic_supports_direct_msis(struct kvm *kvm);
>  int vgic_v4_init(struct kvm *kvm);
> 

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v1 2/6] KVM: arm64: Consume pending SError as early as possible

2019-06-05 Thread Julien Thierry
Hi James,

On 04/06/2019 15:45, James Morse wrote:
> On systems with v8.2 we switch the 'vaxorcism' of guest SError with an
> alternative sequence that uses the ESB-instruction, then reads DISR_EL1.
> This saves the unmasking and re-masking of asynchronous exceptions.
> 
> We do this after we've saved the guest registers and restored the
> host's. Any SError that becomes pending due to this will be accounted
> to the guest, when it actually occurred during host-execution.
> 
> Move the ESB-instruction as early as possible. Any guest SError
> will become pending due to this ESB-instruction and then consumed to
> DISR_EL1 before the host touches anything.
> 

Since you're moving the ESB from a HAS_RAS alternative location to a
normal location, it might be worth noting in the commit message that the
ESB is a NOP when RAS is not implemented, to clarify that we are not
uselessly adding a barrier (or potentially undefined instruction).

> This lets us account for host/guest SError precisely on the guest
> exit exception boundary.
> 
> Signed-off-by: James Morse 
> ---
> N.B. ESB-instruction is a nop on CPUs that don't support it.
> 
>  arch/arm64/include/asm/kvm_asm.h | 2 +-
>  arch/arm64/kvm/hyp/entry.S   | 5 ++---
>  arch/arm64/kvm/hyp/hyp-entry.S   | 2 ++
>  3 files changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h 
> b/arch/arm64/include/asm/kvm_asm.h
> index 9170c43b332f..5c9548ae8fa7 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -45,7 +45,7 @@
>   * Size of the HYP vectors preamble. kvm_patch_vector_branch() generates code
>   * that jumps over this.
>   */
> -#define KVM_VECTOR_PREAMBLE  4
> +#define KVM_VECTOR_PREAMBLE  8
>  
>  #ifndef __ASSEMBLY__
>  
> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> index 93ba3d7ef027..7863ec5266e2 100644
> --- a/arch/arm64/kvm/hyp/entry.S
> +++ b/arch/arm64/kvm/hyp/entry.S
> @@ -138,8 +138,8 @@ ENTRY(__guest_exit)
>  
>  alternative_if ARM64_HAS_RAS_EXTN
>   // If we have the RAS extensions we can consume a pending error
> - // without an unmask-SError and isb.
> - esb
> + // without an unmask-SError and isb. The ESB-instruction consumed any
> + // pending guest error when we took the exception from the guest.
>   mrs_s   x2, SYS_DISR_EL1
>   str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)]
>   cbz x2, 1f
> @@ -157,7 +157,6 @@ alternative_else
>   mov x5, x0
>  
>   dsb sy  // Synchronize against in-flight ld/st
> - nop
>   msr daifclr, #4 // Unmask aborts
>  alternative_endif
>  
> diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> index 914036e6b6d7..b8d37a987b34 100644
> --- a/arch/arm64/kvm/hyp/hyp-entry.S
> +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> @@ -230,6 +230,7 @@ ENDPROC(\label)
>  .macro valid_vect target
>   .align 7
>  661:
> + esb

Having said the above, if the kernel is built without RAS support (you
have to disable some of options enabled by default to get to that) but
runs on a CPU that does have the RAS extention, should we execute a nop
instead of an esb (so have an alternative here)?

Also, when we have the smccc workaround installed we do two esb, is that
intentional/necessary?

Could we have only one esb at the start of hyp_ventry (and "only" 26
nops after it) for KVM_INDIRECT_VECTORS? Or does this not affect
performance that much to be of interest?

>   stp x0, x1, [sp, #-16]!
>  662:
>   b   \target
> @@ -320,6 +321,7 @@ ENTRY(__bp_harden_hyp_vecs_end)
>   .popsection
>  
>  ENTRY(__smccc_workaround_1_smc_start)
> + esb
>   sub sp, sp, #(8 * 4)
>   stp x2, x3, [sp, #(8 * 0)]
>   stp x0, x1, [sp, #(8 * 2)]
> 

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v1 1/6] KVM: arm64: Abstract the size of the HYP vectors pre-amble

2019-06-05 Thread Julien Thierry
Hi James,

On 04/06/2019 15:45, James Morse wrote:
> The EL2 vector hardening feature causes KVM to generate vectors for
> each type of CPU present in the system. The generated sequences already
> do some of the early guest-exit work (i.e. saving registers). To avoid
> duplication the generated vectors branch to the original vector just
> after the preamble. This size is hard coded.
> 
> Adding new instructions to the HYP vector causes strange side effects,
> which are difficult to debug as the affected code is patched in at
> runtime.
> 
> Add KVM_VECTOR_PREAMBLE to tell kvm_patch_vector_branch() how big
> the preamble is. The valid_vect macro can then validate this at
> build time.
> 
> Signed-off-by: James Morse 
> ---
>  arch/arm64/include/asm/kvm_asm.h |  6 ++
>  arch/arm64/kvm/hyp/hyp-entry.S   | 10 +-
>  arch/arm64/kvm/va_layout.c   |  7 +++
>  3 files changed, 18 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h 
> b/arch/arm64/include/asm/kvm_asm.h
> index ff73f5462aca..9170c43b332f 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -41,6 +41,12 @@
>   {ARM_EXCEPTION_TRAP,"TRAP"  },  \
>   {ARM_EXCEPTION_HYP_GONE,"HYP_GONE"  }
>  
> +/*
> + * Size of the HYP vectors preamble. kvm_patch_vector_branch() generates code
> + * that jumps over this.
> + */
> +#define KVM_VECTOR_PREAMBLE  4

Nit: I would use AARCH64_INSN_SIZE instead of 4 for the value if
possible. Makes it clear what the value of the vectore preamble
represent (and if we ad instruction we just multiply).

Otherwise the patch seems a good improvement.

Reviewed-by: Julien Thierry 

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 09/15] arm64: KVM: add support to save/restore SPE profiling buffer controls

2019-05-29 Thread Julien Thierry
Hi Sudeep,

On 05/23/2019 11:34 AM, Sudeep Holla wrote:
> Currently since we don't support profiling using SPE in the guests,
> we just save the PMSCR_EL1, flush the profiling buffers and disable
> sampling. However in order to support simultaneous sampling both in
> the host and guests, we need to save and reatore the complete SPE
> profiling buffer controls' context.
> 
> Let's add the support for the same and keep it disabled for now.
> We can enable it conditionally only if guests are allowed to use
> SPE.
> 
> Signed-off-by: Sudeep Holla 
> ---
>  arch/arm64/kvm/hyp/debug-sr.c | 44 ---
>  1 file changed, 35 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
> index a2714a5eb3e9..a4e6eaf5934f 100644
> --- a/arch/arm64/kvm/hyp/debug-sr.c
> +++ b/arch/arm64/kvm/hyp/debug-sr.c
> @@ -66,7 +66,8 @@
>   default:write_debug(ptr[0], reg, 0);\
>   }
>  
> -static void __hyp_text __debug_save_spe_nvhe(struct kvm_cpu_context *ctxt)
> +static void __hyp_text
> +__debug_save_spe_nvhe(struct kvm_cpu_context *ctxt, bool full_ctxt)

Rather that add a boolean to just indicate "do more stuff" I'd suggest
having two separate functions.

Also this would be an opportunity to fix the naming of this function
which doesn't just save sve context, it also flushes the context and
disables it.

So maybe have a: void __debug_spe_flush_ctx(struct kvm_cpu_context *ctx);

Maybe adapt the name to make it understandable that it does save PMSCR.

and void __debug_spe_save_ctx(struct kvm_cpu_context *ctx);

Which would save the registers you save under the full_ctx condition.

Cheers,

Julien


Re: [PATCH v2 07/15] arm64: KVM: split debug save restore across vm/traps activation

2019-05-28 Thread Julien Thierry
Hi Sudeep,

On 23/05/2019 11:34, Sudeep Holla wrote:
> If we enable profiling buffer controls at EL1 generate a trap exception
> to EL2, it also changes profiling buffer to use EL1&0 stage 1 translation
> regime in case of VHE. To support SPE both in the guest and host, we
> need to first stop profiling and flush the profiling buffers before
> we activate/switch vm or enable/disable the traps.
> 
> In prepartion to do that, lets split the debug save restore functionality
> into 4 steps:
> 1. debug_save_host_context - saves the host context
> 2. debug_restore_guest_context - restore the guest context
> 3. debug_save_guest_context - saves the guest context
> 4. debug_restore_host_context - restores the host context
> 
> Lets rename existing __debug_switch_to_{host,guest} to make sure it's
> aligned to the above and just add the place holders for new ones getting
> added here as we need them to support SPE in guests.
> 
> Signed-off-by: Sudeep Holla 
> ---
>  arch/arm64/include/asm/kvm_hyp.h |  6 --
>  arch/arm64/kvm/hyp/debug-sr.c| 25 -
>  arch/arm64/kvm/hyp/switch.c  | 12 
>  3 files changed, 28 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_hyp.h 
> b/arch/arm64/include/asm/kvm_hyp.h
> index 782955db61dd..1c5ed80fcbda 100644
> --- a/arch/arm64/include/asm/kvm_hyp.h
> +++ b/arch/arm64/include/asm/kvm_hyp.h
> @@ -164,8 +164,10 @@ void sysreg_restore_guest_state_vhe(struct 
> kvm_cpu_context *ctxt);
>  void __sysreg32_save_state(struct kvm_vcpu *vcpu);
>  void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
>  
> -void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
> -void __debug_switch_to_host(struct kvm_vcpu *vcpu);
> +void __debug_save_host_context(struct kvm_vcpu *vcpu);
> +void __debug_restore_guest_context(struct kvm_vcpu *vcpu);
> +void __debug_save_guest_context(struct kvm_vcpu *vcpu);
> +void __debug_restore_host_context(struct kvm_vcpu *vcpu);
>  
>  void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
>  void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
> diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
> index fa51236ebcb3..618884df1dc4 100644
> --- a/arch/arm64/kvm/hyp/debug-sr.c
> +++ b/arch/arm64/kvm/hyp/debug-sr.c
> @@ -149,20 +149,13 @@ static void __hyp_text __debug_restore_state(struct 
> kvm_vcpu *vcpu,
>   write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
>  }
>  
> -void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
> +void __hyp_text __debug_restore_guest_context(struct kvm_vcpu *vcpu)
>  {
>   struct kvm_cpu_context *host_ctxt;
>   struct kvm_cpu_context *guest_ctxt;
>   struct kvm_guest_debug_arch *host_dbg;
>   struct kvm_guest_debug_arch *guest_dbg;
>  
> - /*
> -  * Non-VHE: Disable and flush SPE data generation
> -  * VHE: The vcpu can run, but it can't hide.
> -  */
> - if (!has_vhe())
> - __debug_save_spe_nvhe(>arch.host_debug_state.pmscr_el1);
> -
>   if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
>   return;
>  
> @@ -175,7 +168,7 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu 
> *vcpu)
>   __debug_restore_state(vcpu, guest_dbg, guest_ctxt);
>  }
>  
> -void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
> +void __hyp_text __debug_restore_host_context(struct kvm_vcpu *vcpu)

In the current state of the sources, __debug_switch_to_host() seems to
save the guest debug state before restoring the host's:

__debug_save_state(vcpu, guest_dbg, guest_ctxt);

Since you're splitting the switch_to into save/restore operations, it
feels like this would fit better __debug_save_guest_context() (currently
empty) rather than __debug_restore_host_context().

Cheers,

-- 
Julien Thierry


Re: [PATCH v7 5/5] KVM: arm/arm64: support chained PMU counters

2019-05-21 Thread Julien Thierry
gt; + pmc = kvm_pmu_get_canonical_pmc(pmc);
> + reg = PMEVCNTR0_EL0 + pmc->idx;
> +
> + counter = __vcpu_sys_reg(vcpu, reg);
> + counter_high = __vcpu_sys_reg(vcpu, reg + 1);
> +
> + counter = lower_32_bits(counter) | (counter_high << 32);
> + } else {
> + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
> +   ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
> + counter = __vcpu_sys_reg(vcpu, reg);
> + }
>  
> - /* The real counter value is equal to the value of counter register plus
> + /*
> +  * The real counter value is equal to the value of counter register plus
>* the value perf event counts.
>*/
>   if (pmc->perf_event)
>   counter += perf_event_read_value(pmc->perf_event, ,
>);
>  
> + return counter;
> +}
> +
> +/**
> + * kvm_pmu_get_counter_value - get PMU counter value
> + * @vcpu: The vcpu pointer
> + * @select_idx: The counter index
> + */
> +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
> +{
> + u64 counter;
> + struct kvm_pmu *pmu = >arch.pmu;
> + struct kvm_pmc *pmc = >pmc[select_idx];
> +
> + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
> +
> + if (kvm_pmu_pmc_is_chained(pmc) &&
> + kvm_pmu_pmc_is_high_counter(select_idx))
> + counter >>= 32;
> +
>   return counter & pmc->bitmask;
>  }
>  
> @@ -74,6 +174,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 
> select_idx, u64 val)
>   */
>  static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
>  {
> + pmc = kvm_pmu_get_canonical_pmc(pmc);
>   if (pmc->perf_event) {
>   perf_event_disable(pmc->perf_event);
>   perf_event_release_kernel(pmc->perf_event);
> @@ -91,13 +192,24 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, 
> struct kvm_pmc *pmc)
>  {
>   u64 counter, reg;
>  
> - if (pmc->perf_event) {
> + pmc = kvm_pmu_get_canonical_pmc(pmc);
> + if (!pmc->perf_event)
> + return;
> +
> + if (kvm_pmu_pmc_is_chained(pmc)) {
> + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
> +
> + reg = PMEVCNTR0_EL0 + pmc->idx;
> + __vcpu_sys_reg(vcpu, reg) = counter & pmc->bitmask;
> + __vcpu_sys_reg(vcpu, reg + 1) = (counter >> 32) & pmc->bitmask;
> + } else {
>   counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
>   reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>  ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
>   __vcpu_sys_reg(vcpu, reg) = counter;
> - kvm_pmu_release_perf_event(pmc);
>   }
> +
> + kvm_pmu_release_perf_event(pmc);
>  }
>  
>  /**
> @@ -115,6 +227,8 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
>   pmu->pmc[i].idx = i;
>   pmu->pmc[i].bitmask = 0xUL;
>   }
> +
> + bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
>  }
>  
>  /**
> @@ -154,6 +268,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, 
> u64 val)
>   int i;
>   struct kvm_pmu *pmu = >arch.pmu;
>   struct kvm_pmc *pmc;
> + struct perf_event *perf_event;
>  
>   if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
>   return;
> @@ -163,9 +278,21 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, 
> u64 val)
>   continue;
>  
>   pmc = >pmc[i];
> +
> + /*
> +  * For high counters of chained events we must recreate the
> +  * perf event with the long (64bit) attribute set.
> +  */
> + if (kvm_pmu_pmc_is_chained(pmc) &&
> + kvm_pmu_pmc_is_high_counter(i)) {
> + kvm_pmu_create_perf_event(vcpu, i);
> + continue;
> + }
> +
> + pmc = kvm_pmu_get_canonical_pmc(pmc);

But pmc is already a canonical pmc, we don't need to call
kvm_pmu_get_canonical_pmc(). The condition above is the same as the one
use in kvm_pmu_get_canonical_pmc(), so no "non canonical" pmc ever
reaches that point. I would understand putting a comment to clarify that
fact.

>   if (pmc->perf_event) {
>   perf_event_enable(pmc->perf_event);
> - if (pmc->perf_event->state != PERF_EVENT_STA

Re: [PATCH v6 5/5] KVM: arm/arm64: support chained PMU counters

2019-05-15 Thread Julien Thierry
> vcpu.
>   */
> @@ -389,13 +547,20 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu 
> *vcpu, u64 select_idx)
>  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
>  {
>   struct kvm_pmu *pmu = >arch.pmu;
> - struct kvm_pmc *pmc = >pmc[select_idx];
> + struct kvm_pmc *pmc;
>   struct perf_event *event;
>   struct perf_event_attr attr;
>   u64 eventsel, counter, reg, data;
>  
> - reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
> -   ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
> + /*
> +  * For chained counters the event type and filtering attributes are
> +  * obtained from the low/even counter. We also use this counter to
> +  * determine if the event is enabled/disabled.
> +  */
> + pmc = kvm_pmu_get_canonical_pmc(>pmc[select_idx]);
> +
> + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
> +   ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
>   data = __vcpu_sys_reg(vcpu, reg);
>  
>   kvm_pmu_stop_counter(vcpu, pmc);
> @@ -403,34 +568,77 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu 
> *vcpu, u64 select_idx)
>  
>   /* Software increment event does't need to be backed by a perf event */
>   if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
> - select_idx != ARMV8_PMU_CYCLE_IDX)
> + pmc->idx != ARMV8_PMU_CYCLE_IDX)
>   return;
>  
>   memset(, 0, sizeof(struct perf_event_attr));
>   attr.type = PERF_TYPE_RAW;
>   attr.size = sizeof(attr);
>   attr.pinned = 1;
> - attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, select_idx);
> + attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
>   attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
>   attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
>   attr.exclude_hv = 1; /* Don't count EL2 events */
>   attr.exclude_host = 1; /* Don't count host events */
> - attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
> + attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
>   ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
>  
> - counter = kvm_pmu_get_counter_value(vcpu, select_idx);
> - /* The initial sample period (overflow count) of an event. */
> - attr.sample_period = (-counter) & pmc->bitmask;
> + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
>  
> - event = perf_event_create_kernel_counter(, -1, current,
> + if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
> + /**
> +  * The initial sample period (overflow count) of an event. For
> +  * chained counters we only support overflow interrupts on the
> +  * high counter.
> +  */
> + attr.sample_period = (-counter) & 0xUL;
> + event = perf_event_create_kernel_counter(, -1, current,
> +  kvm_pmu_perf_overflow,
> +  pmc + 1);
> +
> + if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
> + attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
> + } else {
> + /* The initial sample period (overflow count) of an event. */
> + attr.sample_period = (-counter) & pmc->bitmask;
> + event = perf_event_create_kernel_counter(, -1, current,
>kvm_pmu_perf_overflow, pmc);
> + }
> +
>   if (IS_ERR(event)) {
>   pr_err_once("kvm: pmu event creation failed %ld\n",
>   PTR_ERR(event));
>   return;
>   }
>  
> - pmc->perf_event = event;
> + kvm_pmu_set_perf_event(pmc, event);

You set pmc to a canonical counter at the begining of the function, so
you could do `pmc->perf_event = event`.

> +}
> +
> +/**
> + * kvm_pmu_update_pmc_chained - update chained bitmap
> + * @vcpu: The vcpu pointer
> + * @select_idx: The number of selected counter
> + *
> + * Update the chained bitmap based on the event type written in the
> + * typer register.
> + */
> +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
> +{
> + struct kvm_pmu *pmu = >arch.pmu;
> + struct kvm_pmc *pmc = >pmc[select_idx];
> +
> + if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
> + /*
> +  * During promotion from !chained to chained we must ensure
> +  * the adjacent counter is stopped and its event destroyed
> +  */
> + if (!kvm_pmu_pmc_is_chained(pmc))
> + kvm_pmu_stop_counter(vcpu, pmc);
> +
> + set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
> + } else {
> + clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
> + }
>  }
>  
>  /**
> @@ -452,6 +660,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu 
> *vcpu, u64 data,
> ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
>  
>   __vcpu_sys_reg(vcpu, reg) = event_type;
> +
> + kvm_pmu_update_pmc_chained(vcpu, select_idx);
>   kvm_pmu_create_perf_event(vcpu, select_idx);
>  }
>  
> 

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 5/5] KVM: arm/arm64: support chained PMU counters

2019-05-02 Thread Julien Thierry
 continue;
>  
> - pmc = >pmc[i];
> + pmc = kvm_pmu_get_canonical_pmc(>pmc[i]);

kvm_pmu_pmc_is_chained() works on both counters of a pair, and if we
have a non-canonical counter (i.e. high counter of a chain), we do a
"continue;", so I don't think we need to retrieve the canonical pmc here.

> +
> + /*
> +  * For high counters of chained events we must recreate the
> +  * perf event with the long (64bit) attribute set.
> +  */
> + if (kvm_pmu_pmc_is_chained(pmc) &&
> + kvm_pmu_pmc_is_high_counter(i)) {
> + kvm_pmu_create_perf_event(vcpu, i);
> + continue;
> + }
> +
>   if (pmc->perf_event) {
>   perf_event_enable(pmc->perf_event);
>   if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
> @@ -191,7 +320,18 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, 
> u64 val)
>   if (!(val & BIT(i)))
>   continue;
>  
> - pmc = >pmc[i];
> + pmc = kvm_pmu_get_canonical_pmc(>pmc[i]);

Same.

> +
> + /*
> +  * For high counters of chained events we must recreate the
> +  * perf event with the long (64bit) attribute unset.
> +  */
> + if (kvm_pmu_pmc_is_chained(pmc) &&
> + kvm_pmu_pmc_is_high_counter(i)) {
> + kvm_pmu_create_perf_event(vcpu, i);
> + continue;
> + }
> +
>   if (pmc->perf_event)
>   perf_event_disable(pmc->perf_event);
>   }
> @@ -281,17 +421,6 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
>   kvm_pmu_update_state(vcpu);
>  }
>  
> -static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
> -{
> - struct kvm_pmu *pmu;
> - struct kvm_vcpu_arch *vcpu_arch;
> -
> - pmc -= pmc->idx;
> - pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
> - vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
> - return container_of(vcpu_arch, struct kvm_vcpu, arch);
> -}
> -
>  /**
>   * When the perf event overflows, set the overflow status and inform the 
> vcpu.
>   */
> @@ -389,11 +518,19 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu 
> *vcpu, u64 select_idx)
>  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
>  {
>   struct kvm_pmu *pmu = >arch.pmu;
> - struct kvm_pmc *pmc = >pmc[select_idx];
> + struct kvm_pmc *pmc = kvm_pmu_get_canonical_pmc(>pmc[select_idx]);
>   struct perf_event *event;
>   struct perf_event_attr attr;
>   u64 eventsel, counter, reg, data;
>  
> + /*
> +  * For chained counters the event type and filtering attributes are
> +  * obtained from the low/even counter. We also use this counter to
> +  * determine if the event is enabled/disabled.
> +  */
> + if (kvm_pmu_event_is_chained(vcpu, select_idx))
> + select_idx &= ~1UL;
> +

With both this and the pmc initialization it feels like we're doing
double the work/open coding things.

You could delay initialization of pmc here, after adjusting the
selec_idx to:

pmc = pmu->pmc[select_idx];

>   reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
> ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
>   data = __vcpu_sys_reg(vcpu, reg);
> @@ -418,12 +555,28 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu 
> *vcpu, u64 select_idx)
>   attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
>   ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
>  
> - counter = kvm_pmu_get_counter_value(vcpu, select_idx);
> - /* The initial sample period (overflow count) of an event. */
> - attr.sample_period = (-counter) & pmc->bitmask;
> + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
> +
> + if (kvm_pmu_event_is_chained(vcpu, pmc->idx)) {

Nit: At that point I feel like kvm_pmu_pmc_is_chained() is a simpler
operation. (If we update the evtype we call the create function again
after setting the pair bitmap anyway, right?)

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH kvmtool 05/16] ioport: pci: Move port allocations to PCI devices

2019-04-30 Thread Julien Thierry
Hi,

On 04/04/2019 14:45, Andre Przywara wrote:
> On Thu, 7 Mar 2019 08:36:06 +
> Julien Thierry  wrote:
> 
> Hi,
> 
>> The dynamic ioport allocation with IOPORT_EMPTY is currently only used
>> by PCI devices. Other devices use fixed ports for which they request
>> registration to the ioport API.
>>
>> PCI ports need to be in the PCI IO space and there is no reason ioport
>> API should know a PCI port is being allocated and needs to be placed in
>> PCI IO space. This currently just happens to be the case.
>>
>> Move the responsability of dynamic allocation of ioports from the ioport
>> API to PCI.
>>
>> In the future, if other types of devices also need dynamic ioport
>> allocation, they'll have to figure out the range of ports they are
>> allowed to use.
>>
>> Signed-off-by: Julien Thierry 
>> ---
>>  hw/pci-shmem.c   |  3 ++-
>>  hw/vesa.c|  4 ++--
>>  include/kvm/ioport.h |  3 ---
>>  include/kvm/pci.h|  2 ++
>>  ioport.c | 18 --
>>  pci.c|  8 
>>  vfio/core.c  |  6 --
>>  virtio/pci.c |  3 ++-
>>  8 files changed, 20 insertions(+), 27 deletions(-)
>>
>> diff --git a/hw/pci-shmem.c b/hw/pci-shmem.c
>> index f92bc75..a0c5ba8 100644
>> --- a/hw/pci-shmem.c
>> +++ b/hw/pci-shmem.c
>> @@ -357,7 +357,8 @@ int pci_shmem__init(struct kvm *kvm)
>>  return 0;
>>  
>>  /* Register MMIO space for MSI-X */
>> -r = ioport__register(kvm, IOPORT_EMPTY, _pci__io_ops, 
>> IOPORT_SIZE, NULL);
>> +r = pci_get_io_port_block(IOPORT_SIZE);
>> +r = ioport__register(kvm, r, _pci__io_ops, IOPORT_SIZE, NULL);
>>  if (r < 0)
>>  return r;
>>  ivshmem_registers = (u16)r;
>> diff --git a/hw/vesa.c b/hw/vesa.c
>> index f3c5114..404a8a3 100644
>> --- a/hw/vesa.c
>> +++ b/hw/vesa.c
>> @@ -60,8 +60,8 @@ struct framebuffer *vesa__init(struct kvm *kvm)
>>  
>>  if (!kvm->cfg.vnc && !kvm->cfg.sdl && !kvm->cfg.gtk)
>>  return NULL;
>> -
>> -r = ioport__register(kvm, IOPORT_EMPTY, _io_ops, IOPORT_SIZE, 
>> NULL);
>> +r = pci_get_io_space_block(IOPORT_SIZE);
> 
> I am confused. This is still registering I/O ports, right? And this
> (misnamed) function is about MMIO?
> So should it read r = pci_get_io_port_block(IOPORT_SIZE); ?
> 
>> +r = ioport__register(kvm, r, _io_ops, IOPORT_SIZE, NULL);
>>  if (r < 0)
>>  return ERR_PTR(r);
>>  
>> diff --git a/include/kvm/ioport.h b/include/kvm/ioport.h
>> index db52a47..b10fcd5 100644
>> --- a/include/kvm/ioport.h
>> +++ b/include/kvm/ioport.h
>> @@ -14,11 +14,8 @@
>>  
>>  /* some ports we reserve for own use */
>>  #define IOPORT_DBG  0xe0
>> -#define IOPORT_START0x6200
>>  #define IOPORT_SIZE 0x400
>>  
>> -#define IOPORT_EMPTYUSHRT_MAX
>> -
>>  struct kvm;
>>  
>>  struct ioport {
>> diff --git a/include/kvm/pci.h b/include/kvm/pci.h
>> index a86c15a..bdbd183 100644
>> --- a/include/kvm/pci.h
>> +++ b/include/kvm/pci.h
>> @@ -19,6 +19,7 @@
>>  #define PCI_CONFIG_DATA 0xcfc
>>  #define PCI_CONFIG_BUS_FORWARD  0xcfa
>>  #define PCI_IO_SIZE 0x100
>> +#define PCI_IOPORT_START0x6200
>>  #define PCI_CFG_SIZE(1ULL << 24)
>>  
>>  struct kvm;
>> @@ -153,6 +154,7 @@ int pci__init(struct kvm *kvm);
>>  int pci__exit(struct kvm *kvm);
>>  struct pci_device_header *pci__find_dev(u8 dev_num);
>>  u32 pci_get_io_space_block(u32 size);
> 
> So I think this was already misnamed, but with your new function below
> becomes utterly confusing. Can we rename this to pci_get_mmio_space_block?

Yes, seems fair enough. I'll add a patch to rename that.

> 
>> +u16 pci_get_io_port_block(u32 size);
>>  void pci__assign_irq(struct device_header *dev_hdr);
>>  void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void 
>> *data, int size);
>>  void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void 
>> *data, int size);
>> diff --git a/ioport.c b/ioport.c
>> index a6dc65e..a72e403 100644
>> --- a/ioport.c
>> +++ b/ioport.c
>> @@ -16,24 +16,8 @@
>>  
>>  #define ioport_node(n) rb_entry(n, struct ioport, node)
>>  
>> -DEFINE_MUTEX(ioport_mutex);
>> -
>> -static u16 

Re: [PATCH v6 19/27] KVM: arm64: Enumerate SVE register indices for KVM_GET_REG_LIST

2019-03-28 Thread Julien Thierry



On 28/03/2019 16:48, Dave Martin wrote:
> On Thu, Mar 28, 2019 at 02:29:23PM +0000, Julien Thierry wrote:
>>
>>
>> On 28/03/2019 12:27, Dave Martin wrote:
>>> On Wed, Mar 27, 2019 at 03:21:02PM +, Julien Thierry wrote:
>>>>
>>>>
>>>> On 27/03/2019 10:33, Dave Martin wrote:
> 
> [...]
> 
>>>>>   return slices;
>>>>> }
>>>>>
>>>>> This may be clearer, but felt a bit like overkill...
>>>>>
>>>>> Thoughts?
>>>>
>>>> Seems a bit overkill yes... I was more thinking of a define and the
>>>> person in charge of adding the slice support would just need to look for
>>>> references to that define to know (some of) the places that would need
>>>> rework/review.
>>>>
>>>> So, unless someone else thinks it's good to introduce it right now you
>>>> can ignore that.
>>>
>>> OK, how about the following?  This keeps things minimal, but should help
>>> future maintainers know that something may need updating here in the
>>> future. 
>>>
>>
>> Yes, I think this looks good.
> 
> OK, are you happy for me to keep your Reviewed-by with that change?
> 

Yes, please do!

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v6 19/27] KVM: arm64: Enumerate SVE register indices for KVM_GET_REG_LIST

2019-03-28 Thread Julien Thierry



On 28/03/2019 12:27, Dave Martin wrote:
> On Wed, Mar 27, 2019 at 03:21:02PM +0000, Julien Thierry wrote:
>>
>>
>> On 27/03/2019 10:33, Dave Martin wrote:
>>> On Wed, Mar 27, 2019 at 09:47:42AM +, Julien Thierry wrote:
>>>> Hi Dave,
>>>>
>>>> On 19/03/2019 17:52, Dave Martin wrote:
> 
> [...]
> 
>>>>> +static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu)
>>>>> +{
>>>>> + /* Only the first slice ever exists, for now */
>>>>> + const unsigned int slices = 1;
>>>>
>>>> Nit: Might be worth introducing a macro/inline function for the number
>>>> of slices supported. This way, the day we need to change that, we only
>>>> need to look for that identifier.
>>>
>>> ... Reasonable point, but I wanted to avoid inventing anything
>>> prematurely, partly because sve_reg_to_region() will need work in order
>>> to support multiple slices (though it's not rocket science).
>>>
>>> I could introduce something like the following:
>>>
>>> static unsigned int sve_num_slices(const struct kvm_vcpu *vcpu)
>>> {
>>> unsigned int slice_size = KVM_REG_SIZE(KVM_REG_ARM64_SVE_ZREG(0, 0));
>>> unsigned int slices = DIV_ROUND_UP(vcpu->arch.sve_max_vl, slice_size);
>>>
>>> /*
>>>  * For now, the SVE register ioctl access code won't work
>>>  * properly with multiple register slices.  KVM should prevent
>>>  * configuration of a vcpu with a maximum vector length large
>>>  * enough to trigger this:
>>>  */
>>> if (WARN_ON_ONCE(slices > 1))
>>> return 1;
>>>
>>> return slices;
>>> }
>>>
>>> This may be clearer, but felt a bit like overkill...
>>>
>>> Thoughts?
>>
>> Seems a bit overkill yes... I was more thinking of a define and the
>> person in charge of adding the slice support would just need to look for
>> references to that define to know (some of) the places that would need
>> rework/review.
>>
>> So, unless someone else thinks it's good to introduce it right now you
>> can ignore that.
> 
> OK, how about the following?  This keeps things minimal, but should help
> future maintainers know that something may need updating here in the
> future. 
> 

Yes, I think this looks good.

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v6 21/27] KVM: arm/arm64: Add hook for arch-specific KVM initialisation

2019-03-27 Thread Julien Thierry



On 27/03/2019 10:41, Dave Martin wrote:
> On Wed, Mar 27, 2019 at 10:07:17AM +0000, Julien Thierry wrote:
>> Hi Dave,
>>
>> On 19/03/2019 17:52, Dave Martin wrote:
>>> This patch adds a kvm_arm_init_arch_resources() hook to perform
>>> subarch-specific initialisation when starting up KVM.
>>>
>>> This will be used in a subsequent patch for global SVE-related
>>> setup on arm64.
>>>
>>> No functional change.
>>>
>>> Signed-off-by: Dave Martin 
>>> ---
>>>  arch/arm/include/asm/kvm_host.h   | 2 ++
>>>  arch/arm64/include/asm/kvm_host.h | 2 ++
>>>  virt/kvm/arm/arm.c| 4 
>>>  3 files changed, 8 insertions(+)
>>>
>>> diff --git a/arch/arm/include/asm/kvm_host.h 
>>> b/arch/arm/include/asm/kvm_host.h
>>> index 770d732..a49ee01 100644
>>> --- a/arch/arm/include/asm/kvm_host.h
>>> +++ b/arch/arm/include/asm/kvm_host.h
>>> @@ -53,6 +53,8 @@
>>>  
>>>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>>>  
>>> +static inline int kvm_arm_init_arch_resources(void) { return 0; }
>>> +
>>>  u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
>>>  int __attribute_const__ kvm_target_cpu(void);
>>>  int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
>>> diff --git a/arch/arm64/include/asm/kvm_host.h 
>>> b/arch/arm64/include/asm/kvm_host.h
>>> index 205438a..3e89509 100644
>>> --- a/arch/arm64/include/asm/kvm_host.h
>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>> @@ -58,6 +58,8 @@
>>>  
>>>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>>>  
>>> +static inline int kvm_arm_init_arch_resources(void) { return 0; }
>>> +
>>>  int __attribute_const__ kvm_target_cpu(void);
>>>  int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
>>>  int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
>>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>>> index 99c3738..c69e137 100644
>>> --- a/virt/kvm/arm/arm.c
>>> +++ b/virt/kvm/arm/arm.c
>>> @@ -1664,6 +1664,10 @@ int kvm_arch_init(void *opaque)
>>> if (err)
>>> return err;
>>>  
>>> +   err = kvm_arm_init_arch_resources();
>>> +   if (err)
>>> +   return err;
>>> +
>>
>> Nit: Does this need to be the very first thing we do for arch
>> initialization?
>>
>> In the same function I see a call to init_common_resources(), so I
>> would've pictured kvm_arm_init_arch_resources() being called close to it
>> (either right before or right after).
> 
> With git format-patch -U4 (so, one extra line of context):
> 
> @@ -1663,8 +1663,12 @@ int kvm_arch_init(void *opaque)
> err = init_common_resources();
> if (err)
> return err;
> 
> +   err = kvm_arm_init_arch_resources();
> +   if (err)
> +   return err;
> +
> 
> Does that answer your concern?
> 

Ah yes, sorry for the noise! Disregard my comment :) .

Thanks,

Julien

> I'm guessing we might at some point find we have to move this call if we
> add more code into kvm_arm_init_arch_resources(), but for now there is
> no dependency between the SVE init stuff and anything else here.  So
> the it doesn't matter exactly when we call it today, so long as it is
> called before anyone can start creating vcpus.
> 
>> Otherwise:
>>
>> Reviewed-by: Julien Thierry 
> 
> I'll wait for your response before applying this.
> 
> Thanks
> ---Dave
> 

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v6 19/27] KVM: arm64: Enumerate SVE register indices for KVM_GET_REG_LIST

2019-03-27 Thread Julien Thierry



On 27/03/2019 10:33, Dave Martin wrote:
> On Wed, Mar 27, 2019 at 09:47:42AM +0000, Julien Thierry wrote:
>> Hi Dave,
>>
>> On 19/03/2019 17:52, Dave Martin wrote:
>>> This patch includes the SVE register IDs in the list returned by
>>> KVM_GET_REG_LIST, as appropriate.
>>>
>>> On a non-SVE-enabled vcpu, no new IDs are added.
>>>
>>> On an SVE-enabled vcpu, IDs for the FPSIMD V-registers are removed
>>> from the list, since userspace is required to access the Z-
>>> registers instead in order to access the V-register content.  For
>>> the variably-sized SVE registers, the appropriate set of slice IDs
>>> are enumerated, depending on the maximum vector length for the
>>> vcpu.
>>>
>>> As it currently stands, the SVE architecture never requires more
>>> than one slice to exist per register, so this patch adds no
>>> explicit support for enumerating multiple slices.  The code can be
>>> extended straightforwardly to support this in the future, if
>>> needed.
>>>
>>> Signed-off-by: Dave Martin 
>>>
>>
>> Reviewed-by: Julien Thierry 
> 
> Thanks, although...
> 
>>> ---
>>>
>>> Changes since v5:
>>>
>>> (Dropped Julien Thierry's Reviewed-by due to non-trivial rebasing)
>>>
>>>  * Move mis-split reword to prevent put_user()s being accidentally the
>>>correct size from KVM: arm64/sve: Add pseudo-register for the guest's
>>>vector lengths.
>>> ---
>>>  arch/arm64/kvm/guest.c | 56 
>>> ++
>>>  1 file changed, 56 insertions(+)
>>>
>>> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
>>> index 736d8cb..585c31e5 100644
>>> --- a/arch/arm64/kvm/guest.c
>>> +++ b/arch/arm64/kvm/guest.c
>>> @@ -411,6 +411,56 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const 
>>> struct kvm_one_reg *reg)
>>> return copy_to_user(uaddr, , KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
>>>  }
>>>  
>>> +static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu)
>>> +{
>>> +   /* Only the first slice ever exists, for now */
>>> +   const unsigned int slices = 1;
>>
>> Nit: Might be worth introducing a macro/inline function for the number
>> of slices supported. This way, the day we need to change that, we only
>> need to look for that identifier.
> 
> ... Reasonable point, but I wanted to avoid inventing anything
> prematurely, partly because sve_reg_to_region() will need work in order
> to support multiple slices (though it's not rocket science).
> 
> I could introduce something like the following:
> 
> static unsigned int sve_num_slices(const struct kvm_vcpu *vcpu)
> {
>   unsigned int slice_size = KVM_REG_SIZE(KVM_REG_ARM64_SVE_ZREG(0, 0));
>   unsigned int slices = DIV_ROUND_UP(vcpu->arch.sve_max_vl, slice_size);
> 
>   /*
>* For now, the SVE register ioctl access code won't work
>* properly with multiple register slices.  KVM should prevent
>* configuration of a vcpu with a maximum vector length large
>    * enough to trigger this:
>*/
>   if (WARN_ON_ONCE(slices > 1))
>   return 1;
> 
>   return slices;
> }
> 
> This may be clearer, but felt a bit like overkill...
> 
> Thoughts?

Seems a bit overkill yes... I was more thinking of a define and the
person in charge of adding the slice support would just need to look for
references to that define to know (some of) the places that would need
rework/review.

So, unless someone else thinks it's good to introduce it right now you
can ignore that.

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v6 25/27] KVM: arm64: Add a capability to advertise SVE support

2019-03-27 Thread Julien Thierry
Hi Dave,

On 19/03/2019 17:52, Dave Martin wrote:
> To provide a uniform way to check for KVM SVE support amongst other
> features, this patch adds a suitable capability KVM_CAP_ARM_SVE,
> and reports it as present when SVE is available.
> 
> Signed-off-by: Dave Martin 
> 

Reviewed-by: Julien Thierry 

Cheers,

Julien

> ---
> 
> Changes since v5:
> 
>  * [Julien Thierry] Strip out has_vhe() sanity-check, which wasn't in
>the most logical place, and anyway doesn't really belong in this
>patch.
> 
>Moved to KVM: arm64/sve: Allow userspace to enable SVE for vcpus
>instead.
> ---
>  arch/arm64/kvm/reset.c   | 3 +++
>  include/uapi/linux/kvm.h | 1 +
>  2 files changed, 4 insertions(+)
> 
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 4f04dbf..180d7a5 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -98,6 +98,9 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long 
> ext)
>   case KVM_CAP_ARM_VM_IPA_SIZE:
>   r = kvm_ipa_limit;
>   break;
> + case KVM_CAP_ARM_SVE:
> + r = system_supports_sve();
> + break;
>   default:
>   r = 0;
>   }
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index c3b8e7a..1d56444 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -988,6 +988,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_ARM_VM_IPA_SIZE 165
>  #define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166
>  #define KVM_CAP_HYPERV_CPUID 167
> +#define KVM_CAP_ARM_SVE 168
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> 

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v6 24/27] KVM: arm64/sve: Allow userspace to enable SVE for vcpus

2019-03-27 Thread Julien Thierry
Hi Dave,

On 19/03/2019 17:52, Dave Martin wrote:
> Now that all the pieces are in place, this patch offers a new flag
> KVM_ARM_VCPU_SVE that userspace can pass to KVM_ARM_VCPU_INIT to
> turn on SVE for the guest, on a per-vcpu basis.
> 
> As part of this, support for initialisation and reset of the SVE
> vector length set and registers is added in the appropriate places,
> as well as finally setting the KVM_ARM64_GUEST_HAS_SVE vcpu flag,
> to turn on the SVE support code.
> 
> Allocation of the SVE register storage in vcpu->arch.sve_state is
> deferred until the SVE configuration is finalized, by which time
> the size of the registers is known.
> 
> Setting the vector lengths supported by the vcpu is considered
> configuration of the emulated hardware rather than runtime
> configuration, so no support is offered for changing the vector
> lengths available to an existing vcpu across reset.
> 
> Signed-off-by: Dave Martin 
> 

Reviewed-by: Julien Thierry 

Cheers,

Julien

> ---
> 
> Changes since v5:
> 
>  * Refactored to make the code flow clearer and clarify responsiblity
>for the various initialisation phases/checks.
> 
>In place of the previous, confusingly dual-purpose kvm_reset_sve(),
>enabling and resetting of SVE are split into separate functions and
>called as appropriate from kvm_reset_vcpu().
> 
>To avoid interactions with preempt_disable(), memory allocation is
>done in the kvm_vcpu_first_fun_init() path instead.  To achieve
>this, the SVE memory allocation is moved to kvm_arm_vcpu_finalize(),
>which now takes on the role of actually doing deferred setup instead
>of just setting a flag to indicate that the setup was done.
> 
>  * Add has_vhe() sanity-check into kvm_vcpu_enable_sve(), since it
>makes more sense here than when resetting the vcpu.
> 
>  * When checking for SVE finalization in kvm_reset_vcpu(), call the new
>SVE-specific function kvm_arm_vcpu_sve_finalized().  The new generic
>check kvm_arm_vcpu_is_finalized() is unnecessarily broad here: using
>the appropriate specific check makes the code more self-describing.
> 
>  * Definition of KVM_ARM_VCPU_SVE moved to KVM: arm64/sve: Add pseudo-
>register for the guest's vector lengths (which needs it for the
>KVM_ARM_VCPU_FINALIZE ioctl).
> ---
>  arch/arm64/include/asm/kvm_host.h |  3 +--
>  arch/arm64/kvm/reset.c| 45 
> ++-
>  2 files changed, 45 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 5475cc4..9d57cf8 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -49,8 +49,7 @@
>  
>  #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
>  
> -/* Will be incremented when KVM_ARM_VCPU_SVE is fully implemented: */
> -#define KVM_VCPU_MAX_FEATURES 4
> +#define KVM_VCPU_MAX_FEATURES 5
>  
>  #define KVM_REQ_SLEEP \
>   KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index e7f9c06..4f04dbf 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -20,10 +20,12 @@
>   */
>  
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include 
> @@ -37,6 +39,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /* Maximum phys_shift supported for any VM on this host */
>  static u32 kvm_ipa_limit;
> @@ -130,6 +133,27 @@ int kvm_arm_init_arch_resources(void)
>   return 0;
>  }
>  
> +static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
> +{
> + if (!system_supports_sve())
> + return -EINVAL;
> +
> + /* Verify that KVM startup enforced this when SVE was detected: */
> + if (WARN_ON(!has_vhe()))
> + return -EINVAL;
> +
> + vcpu->arch.sve_max_vl = kvm_sve_max_vl;
> +
> + /*
> +  * Userspace can still customize the vector lengths by writing
> +  * KVM_REG_ARM64_SVE_VLS.  Allocation is deferred until
> +  * kvm_arm_vcpu_finalize(), which freezes the configuration.
> +  */
> + vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
> +
> + return 0;
> +}
> +
>  /*
>   * Finalize vcpu's maximum SVE vector length, allocating
>   * vcpu->arch.sve_state as necessary.
> @@ -188,13 +212,20 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
>   kfree(vcpu->arch.sve_state);
>  }
>  
> +static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
> +{
> + if (vcpu_has_sve(vcpu))
> + memset(vcpu->arch.sve_s

Re: [PATCH v6 23/27] KVM: arm64/sve: Add pseudo-register for the guest's vector lengths

2019-03-27 Thread Julien Thierry
Hi Dave,

On 19/03/2019 17:52, Dave Martin wrote:
> This patch adds a new pseudo-register KVM_REG_ARM64_SVE_VLS to
> allow userspace to set and query the set of vector lengths visible
> to the guest.
> 
> In the future, multiple register slices per SVE register may be
> visible through the ioctl interface.  Once the set of slices has
> been determined we would not be able to allow the vector length set
> to be changed any more, in order to avoid userspace seeing
> inconsistent sets of registers.  For this reason, this patch adds
> support for explicit finalization of the SVE configuration via the
> KVM_ARM_VCPU_FINALIZE ioctl.
> 
> Finalization is the proper place to allocate the SVE register state
> storage in vcpu->arch.sve_state, so this patch adds that as
> appropriate.  The data is freed via kvm_arch_vcpu_uninit(), which
> was previously a no-op on arm64.
> 
> To simplify the logic for determining what vector lengths can be
> supported, some code is added to KVM init to work this out, in the
> kvm_arm_init_arch_resources() hook.
> 
> The KVM_REG_ARM64_SVE_VLS pseudo-register is not exposed yet.
> Subsequent patches will allow SVE to be turned on for guest vcpus,
> making it visible.
> 
> Signed-off-by: Dave Martin 
> 

Reviewed-by: Julien Thierry 

Cheers,

Julien

> ---
> 
> Changes since v5:
> 
>  * [Julien Thierry] Delete overzealous BUILD_BUG_ON() checks.
>It also turns out that these could cause kernel build failures in
>some configurations, even though the checked condition is compile-
>time constant.
> 
>Because of the way the affected functions are called, the checks
>are superfluous, so the simplest option is simply to get rid of
>them.
> 
>  * [Julien Thierry] Free vcpu->arch.sve_state (if any) in
>kvm_arch_vcpu_uninit() (which is currently a no-op).
> 
>This was accidentally lost during a previous rebase.
> 
>  * Add kvm_arm_init_arch_resources() hook, and use it to probe SVE
>configuration for KVM, to avoid duplicating the logic elsewhere.
>We only need to do this once.
> 
>  * Move sve_state buffer allocation to kvm_arm_vcpu_finalize().
> 
>As well as making the code more straightforward, this avoids the
>need to allocate memory in kvm_reset_vcpu(), the meat of which is
>non-preemptible since commit 358b28f09f0a ("arm/arm64: KVM: Allow a
>VCPU to fully reset itself").
> 
>The refactoring means that if this has not been done by the time
>we hit KVM_RUN, then this allocation will happen on the
>kvm_arm_first_run_init() path, where preemption remains enabled.
> 
>  * Add a couple of comments in {get,set}_sve_reg() to make the handling
>of the KVM_REG_ARM64_SVE_VLS special case a little clearer.
> 
>  * Move mis-split rework to avoid put_user() being the correct size
>by accident in KVM_GET_REG_LIST to KVM: arm64: Enumerate SVE register
>indices for KVM_GET_REG_LIST.
> 
>  * Fix wrong WARN_ON() check sense when checking whether the
>implementation may needs move SVE register slices than KVM can
>support.
> 
>  * Fix erroneous setting of vcpu->arch.sve_max_vl based on stale loop
>control veriable vq.
> 
>  * Move definition of KVM_ARM_VCPU_SVE from KVM: arm64/sve: Allow
>userspace to enable SVE for vcpus.
> 
>  * Migrate to explicit finalization of the SVE configuration, using
>KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE).
> ---
>  arch/arm64/include/asm/kvm_host.h |  15 +++--
>  arch/arm64/include/uapi/asm/kvm.h |   5 ++
>  arch/arm64/kvm/guest.c| 114 
> +-
>  arch/arm64/kvm/reset.c|  89 +
>  4 files changed, 215 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 98658f7..5475cc4 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -23,7 +23,6 @@
>  #define __ARM64_KVM_HOST_H__
>  
>  #include 
> -#include 
>  #include 
>  #include 
>  #include 
> @@ -50,6 +49,7 @@
>  
>  #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
>  
> +/* Will be incremented when KVM_ARM_VCPU_SVE is fully implemented: */
>  #define KVM_VCPU_MAX_FEATURES 4
>  
>  #define KVM_REQ_SLEEP \
> @@ -59,10 +59,12 @@
>  
>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>  
> -static inline int kvm_arm_init_arch_resources(void) { return 0; }
> +extern unsigned int kvm_sve_max_vl;
> +int kvm_arm_init_arch_resources(void);
>  
>  int __attribute_const__ kvm_target_cpu(void);
>  int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
> +void kvm_arch_vcpu_uninit(struct kvm_vcpu *

Re: [PATCH v6 22/27] KVM: arm/arm64: Add KVM_ARM_VCPU_FINALIZE ioctl

2019-03-27 Thread Julien Thierry
Hi Dave,

On 19/03/2019 17:52, Dave Martin wrote:
> Some aspects of vcpu configuration may be too complex to be
> completed inside KVM_ARM_VCPU_INIT.  Thus, there may be a
> requirement for userspace to do some additional configuration
> before various other ioctls will work in a consistent way.
> 
> In particular this will be the case for SVE, where userspace will
> need to negotiate the set of vector lengths to be made available to
> the guest before the vcpu becomes fully usable.
> 
> In order to provide an explicit way for userspace to confirm that
> it has finished setting up a particular vcpu feature, this patch
> adds a new ioctl KVM_ARM_VCPU_FINALIZE.
> 
> When userspace has opted into a feature that requires finalization,
> typically by means of a feature flag passed to KVM_ARM_VCPU_INIT, a
> matching call to KVM_ARM_VCPU_FINALIZE is now required before
> KVM_RUN or KVM_GET_REG_LIST is allowed.  Individual features may
> impose additional restrictions where appropriate.
> 
> No existing vcpu features are affected by this, so current
> userspace implementations will continue to work exactly as before,
> with no need to issue KVM_ARM_VCPU_FINALIZE.
> 
> As implemented in this patch, KVM_ARM_VCPU_FINALIZE is currently a
> placeholder: no finalizable features exist yet, so ioctl is not
> required and will always yield EINVAL.  Subsequent patches will add
> the finalization logic to make use of this ioctl for SVE.
> 
> No functional change for existing userspace.
> 
> Signed-off-by: Dave Martin 
> 
> ---
> 
> Changes since v5:
> 
>  * Commit message, including subject line, rewritten.
> 
>This patch is a rework of "KVM: arm/arm64: Add hook to finalize the
>vcpu configuration".  The old subject line and commit message no
>longer accurately described what the patch does.  However, the code
>is an evolution of the previous patch rather than a wholesale
>rewrite.
> 
>  * Added an explicit KVM_ARM_VCPU_FINALIZE ioctl, rather than just
>providing internal hooks in the kernel to finalize the vcpu
>configuration implicitly.  This allows userspace to confirm exactly
>when it has finished configuring the vcpu and is ready to use it.
> 
>This results in simpler (and hopefully more maintainable) ioctl
>ordering rules.
> ---
>  arch/arm/include/asm/kvm_host.h   |  4 
>  arch/arm64/include/asm/kvm_host.h |  4 
>  include/uapi/linux/kvm.h  |  3 +++
>  virt/kvm/arm/arm.c| 18 ++
>  4 files changed, 29 insertions(+)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index a49ee01..e80cfc1 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -19,6 +19,7 @@
>  #ifndef __ARM_KVM_HOST_H__
>  #define __ARM_KVM_HOST_H__
>  
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -411,4 +412,7 @@ static inline int kvm_arm_setup_stage2(struct kvm *kvm, 
> unsigned long type)
>   return 0;
>  }
>  
> +#define kvm_arm_vcpu_finalize(vcpu, what) (-EINVAL)
> +#define kvm_arm_vcpu_is_finalized(vcpu) true
> +
>  #endif /* __ARM_KVM_HOST_H__ */
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 3e89509..98658f7 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -23,6 +23,7 @@
>  #define __ARM64_KVM_HOST_H__
>  
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -625,4 +626,7 @@ void kvm_arch_free_vm(struct kvm *kvm);
>  
>  int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
>  
> +#define kvm_arm_vcpu_finalize(vcpu, what) (-EINVAL)
> +#define kvm_arm_vcpu_is_finalized(vcpu) true

I had a bit of hesitation for having a per feature ioctl call but in the
end this seems a simple enough to keep existing guest (not doing the
ioctl call) working and checking that the necessary features have been
finalized is also pretty straight forward.

FWIW:

Reviewed-by: Julien Thierry 

Thanks,

Julien

> +
>  #endif /* __ARM64_KVM_HOST_H__ */
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index dc77a5a..c3b8e7a 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1441,6 +1441,9 @@ struct kvm_enc_region {
>  /* Available with KVM_CAP_HYPERV_CPUID */
>  #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2)
>  
> +/* Available with KVM_CAP_ARM_SVE */
> +#define KVM_ARM_VCPU_FINALIZE  _IOW(KVMIO,  0xc2, int)
> +
>  /* Secure Encrypted Virtualization command */
>  enum sev_cmd_id {
>   /* Guest initialization commands */
> diff --git a/v

Re: [PATCH v6 21/27] KVM: arm/arm64: Add hook for arch-specific KVM initialisation

2019-03-27 Thread Julien Thierry
Hi Dave,

On 19/03/2019 17:52, Dave Martin wrote:
> This patch adds a kvm_arm_init_arch_resources() hook to perform
> subarch-specific initialisation when starting up KVM.
> 
> This will be used in a subsequent patch for global SVE-related
> setup on arm64.
> 
> No functional change.
> 
> Signed-off-by: Dave Martin 
> ---
>  arch/arm/include/asm/kvm_host.h   | 2 ++
>  arch/arm64/include/asm/kvm_host.h | 2 ++
>  virt/kvm/arm/arm.c| 4 
>  3 files changed, 8 insertions(+)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 770d732..a49ee01 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -53,6 +53,8 @@
>  
>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>  
> +static inline int kvm_arm_init_arch_resources(void) { return 0; }
> +
>  u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
>  int __attribute_const__ kvm_target_cpu(void);
>  int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 205438a..3e89509 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -58,6 +58,8 @@
>  
>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>  
> +static inline int kvm_arm_init_arch_resources(void) { return 0; }
> +
>  int __attribute_const__ kvm_target_cpu(void);
>  int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
>  int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext);
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 99c3738..c69e137 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -1664,6 +1664,10 @@ int kvm_arch_init(void *opaque)
>   if (err)
>   return err;
>  
> + err = kvm_arm_init_arch_resources();
> + if (err)
> + return err;
> +

Nit: Does this need to be the very first thing we do for arch
initialization?

In the same function I see a call to init_common_resources(), so I
would've pictured kvm_arm_init_arch_resources() being called close to it
(either right before or right after).

Otherwise:

Reviewed-by: Julien Thierry 

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v6 18/27] KVM: arm64/sve: Add SVE support to register access ioctl interface

2019-03-27 Thread Julien Thierry
Hi Dave,

On 19/03/2019 17:52, Dave Martin wrote:
> This patch adds the following registers for access via the
> KVM_{GET,SET}_ONE_REG interface:
> 
>  * KVM_REG_ARM64_SVE_ZREG(n, i) (n = 0..31) (in 2048-bit slices)
>  * KVM_REG_ARM64_SVE_PREG(n, i) (n = 0..15) (in 256-bit slices)
>  * KVM_REG_ARM64_SVE_FFR(i) (in 256-bit slices)
> 
> In order to adapt gracefully to future architectural extensions,
> the registers are logically divided up into slices as noted above:
> the i parameter denotes the slice index.
> 
> This allows us to reserve space in the ABI for future expansion of
> these registers.  However, as of today the architecture does not
> permit registers to be larger than a single slice, so no code is
> needed in the kernel to expose additional slices, for now.  The
> code can be extended later as needed to expose them up to a maximum
> of 32 slices (as carved out in the architecture itself) if they
> really exist someday.
> 
> The registers are only visible for vcpus that have SVE enabled.
> They are not enumerated by KVM_GET_REG_LIST on vcpus that do not
> have SVE.
> 
> Accesses to the FPSIMD registers via KVM_REG_ARM_CORE is not
> allowed for SVE-enabled vcpus: SVE-aware userspace can use the
> KVM_REG_ARM64_SVE_ZREG() interface instead to access the same
> register state.  This avoids some complex and pointless emulation
> in the kernel to convert between the two views of these aliased
> registers.
> 
> Signed-off-by: Dave Martin 
> 

Maybe it's because I already had reviewed the previous iteration, but
this time things do seem a bit clearer.

Reviewed-by: Julien Thierry 

Thanks,

Julien

> ---
> 
> Changes since v5:
> 
>  * [Julien Thierry] rename sve_reg_region() to sve_reg_to_region() to
>make its purpose a bit clearer.
> 
>  * [Julien Thierry] rename struct sve_state_region to
>sve_state_reg_region to make it clearer this this struct only
>describes the bounds of (part of) a single register within
>sve_state.
> 
>  * [Julien Thierry] Add a comment to clarify the purpose of struct
>sve_state_reg_region.
> ---
>  arch/arm64/include/asm/kvm_host.h |  14 
>  arch/arm64/include/uapi/asm/kvm.h |  17 +
>  arch/arm64/kvm/guest.c| 139 
> ++
>  3 files changed, 158 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 4fabfd2..205438a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -329,6 +329,20 @@ struct kvm_vcpu_arch {
>  #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> sve_ffr_offset((vcpu)->arch.sve_max_vl)))
>  
> +#define vcpu_sve_state_size(vcpu) ({ \
> + size_t __size_ret;  \
> + unsigned int __vcpu_vq; \
> + \
> + if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) {  \
> + __size_ret = 0; \
> + } else {\
> + __vcpu_vq = sve_vq_from_vl((vcpu)->arch.sve_max_vl);\
> + __size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq);  \
> + }   \
> + \
> + __size_ret; \
> +})
> +
>  /* vcpu_arch flags field values: */
>  #define KVM_ARM64_DEBUG_DIRTY(1 << 0)
>  #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
> diff --git a/arch/arm64/include/uapi/asm/kvm.h 
> b/arch/arm64/include/uapi/asm/kvm.h
> index 97c3478..ced760c 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -226,6 +226,23 @@ struct kvm_vcpu_events {
>KVM_REG_ARM_FW | ((r) & 0x))
>  #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
>  
> +/* SVE registers */
> +#define KVM_REG_ARM64_SVE(0x15 << KVM_REG_ARM_COPROC_SHIFT)
> +
> +/* Z- and P-regs occupy blocks at the following offsets within this range: */
> +#define KVM_REG_ARM64_SVE_ZREG_BASE  0
> +#define KVM_REG_ARM64_SVE_PREG_BASE  0x400
> +
> +#define KVM_REG_ARM64_SVE_ZREG(n, i) (KVM_REG_ARM64 | KVM_REG_ARM64_SVE | \
> +  KVM_REG_ARM64_SVE_ZREG_BASE |  \
> + 

Re: [PATCH v6 17/27] KVM: arm64: Reject ioctl access to FPSIMD V-regs on SVE vcpus

2019-03-27 Thread Julien Thierry
Hi Dave,

On 19/03/2019 17:52, Dave Martin wrote:
> In order to avoid the pointless complexity of maintaining two ioctl
> register access views of the same data, this patch blocks ioctl
> access to the FPSIMD V-registers on vcpus that support SVE.
> 
> This will make it more straightforward to add SVE register access
> support.
> 
> Since SVE is an opt-in feature for userspace, this will not affect
> existing users.
> 
> Signed-off-by: Dave Martin 
> 

Reviewed-by: Julien Thierry 

Cheer,

Julien

> ---
> 
> (Julien Thierry's Reviewed-by dropped due to non-trivial refactoring)
> 
> Changes since v5:
> 
>  * Refactored to cope with the removal of core_reg_size_from_offset()
>(which was added by another series which will now be handled
>independently).
> 
>This leaves some duplication in that we still filter the V-regs out
>in two places, but this no worse than other existing code in guest.c.
>I plan to tidy this up independently later on.
> ---
>  arch/arm64/kvm/guest.c | 48 
>  1 file changed, 36 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index a391a61..756d0d6 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -54,12 +54,19 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>   return 0;
>  }
>  
> +static bool core_reg_offset_is_vreg(u64 off)
> +{
> + return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) &&
> + off < KVM_REG_ARM_CORE_REG(fp_regs.fpsr);
> +}
> +
>  static u64 core_reg_offset_from_id(u64 id)
>  {
>   return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
>  }
>  
> -static int validate_core_offset(const struct kvm_one_reg *reg)
> +static int validate_core_offset(const struct kvm_vcpu *vcpu,
> + const struct kvm_one_reg *reg)
>  {
>   u64 off = core_reg_offset_from_id(reg->id);
>   int size;
> @@ -91,11 +98,19 @@ static int validate_core_offset(const struct kvm_one_reg 
> *reg)
>   return -EINVAL;
>   }
>  
> - if (KVM_REG_SIZE(reg->id) == size &&
> - IS_ALIGNED(off, size / sizeof(__u32)))
> - return 0;
> + if (KVM_REG_SIZE(reg->id) != size ||
> + !IS_ALIGNED(off, size / sizeof(__u32)))
> + return -EINVAL;
>  
> - return -EINVAL;
> + /*
> +  * The KVM_REG_ARM64_SVE regs must be used instead of
> +  * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on
> +  * SVE-enabled vcpus:
> +  */
> + if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off))
> + return -EINVAL;
> +
> + return 0;
>  }
>  
>  static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
> @@ -117,7 +132,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const 
> struct kvm_one_reg *reg)
>   (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
>   return -ENOENT;
>  
> - if (validate_core_offset(reg))
> + if (validate_core_offset(vcpu, reg))
>   return -EINVAL;
>  
>   if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
> @@ -142,7 +157,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const 
> struct kvm_one_reg *reg)
>   (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
>   return -ENOENT;
>  
> - if (validate_core_offset(reg))
> + if (validate_core_offset(vcpu, reg))
>   return -EINVAL;
>  
>   if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
> @@ -195,13 +210,22 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>   return -EINVAL;
>  }
>  
> -static int kvm_arm_copy_core_reg_indices(u64 __user *uindices)
> +static int copy_core_reg_indices(const struct kvm_vcpu *vcpu,
> +  u64 __user *uindices)
>  {
>   unsigned int i;
>   int n = 0;
>   const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | 
> KVM_REG_ARM_CORE;
>  
>   for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
> + /*
> +  * The KVM_REG_ARM64_SVE regs must be used instead of
> +  * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on
> +  * SVE-enabled vcpus:
> +  */
> + if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(i))
> + continue;
> +
>   if (uindices) {
>   if (put_user(core_reg | i, uindices))
>

Re: [PATCH v6 16/27] KVM: arm64: Factor out core register ID enumeration

2019-03-27 Thread Julien Thierry
Hi Dave,

On 19/03/2019 17:52, Dave Martin wrote:
> In preparation for adding logic to filter out some KVM_REG_ARM_CORE
> registers from the KVM_GET_REG_LIST output, this patch factors out
> the core register enumeration into a separate function and rebuilds
> num_core_regs() on top of it.
> 
> This may be a little more expensive (depending on how good a job
> the compiler does of specialising the code), but KVM_GET_REG_LIST
> is not a hot path.
> 
> This will make it easier to consolidate ID filtering code in one
> place.
> 
> No functional change.
> 
> Signed-off-by: Dave Martin 
> 

Reviewed-by: Julien Thierry 

Cheers,

Julien

> ---
> 
> Changes since v5:
> 
>  * New patch.
> 
>This reimplements part of the separately-posted patch "KVM: arm64:
>Factor out KVM_GET_REG_LIST core register enumeration", minus aspects
>that potentially break the ABI.
> 
>As a result, the opportunity to truly consolidate all the ID reg
>filtering in one place is deliberately left on the floor, for now.
>This will be addressed in a separate series later on.
> ---
>  arch/arm64/kvm/guest.c | 33 +
>  1 file changed, 25 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index 3e38eb2..a391a61 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -23,6 +23,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -194,9 +195,28 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>   return -EINVAL;
>  }
>  
> +static int kvm_arm_copy_core_reg_indices(u64 __user *uindices)
> +{
> + unsigned int i;
> + int n = 0;
> + const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | 
> KVM_REG_ARM_CORE;
> +
> + for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
> + if (uindices) {
> + if (put_user(core_reg | i, uindices))
> + return -EFAULT;
> + uindices++;
> + }
> +
> + n++;
> + }
> +
> + return n;
> +}
> +
>  static unsigned long num_core_regs(void)
>  {
> - return sizeof(struct kvm_regs) / sizeof(__u32);
> + return kvm_arm_copy_core_reg_indices(NULL);
>  }
>  
>  /**
> @@ -276,15 +296,12 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
>   */
>  int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
>  {
> - unsigned int i;
> - const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | 
> KVM_REG_ARM_CORE;
>   int ret;
>  
> - for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
> - if (put_user(core_reg | i, uindices))
> -         return -EFAULT;
> - uindices++;
> - }
> + ret = kvm_arm_copy_core_reg_indices(uindices);
> + if (ret)
> + return ret;
> + uindices += ret;
>  
>   ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
>   if (ret)
> 

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v6 13/27] KVM: arm64/sve: Context switch the SVE registers

2019-03-27 Thread Julien Thierry



On 19/03/2019 17:52, Dave Martin wrote:
> In order to give each vcpu its own view of the SVE registers, this
> patch adds context storage via a new sve_state pointer in struct
> vcpu_arch.  An additional member sve_max_vl is also added for each
> vcpu, to determine the maximum vector length visible to the guest
> and thus the value to be configured in ZCR_EL2.LEN while the vcpu
> is active.  This also determines the layout and size of the storage
> in sve_state, which is read and written by the same backend
> functions that are used for context-switching the SVE state for
> host tasks.
> 
> On SVE-enabled vcpus, SVE access traps are now handled by switching
> in the vcpu's SVE context and disabling the trap before returning
> to the guest.  On other vcpus, the trap is not handled and an exit
> back to the host occurs, where the handle_sve() fallback path
> reflects an undefined instruction exception back to the guest,
> consistently with the behaviour of non-SVE-capable hardware (as was
> done unconditionally prior to this patch).
> 
> No SVE handling is added on non-VHE-only paths, since VHE is an
> architectural and Kconfig prerequisite of SVE.
> 
> Signed-off-by: Dave Martin 
> 

Reviewed-by: Julien Thierry 

> ---
> 
> Changes since v5:
> 
>  * [Julien Thierry, Julien Grall] Commit message typo fixes
> 
>  * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with
>existing code.
> 
>  * [Mark Rutland] Simplify condition for refusing to handle an
>FPSIMD/SVE trap, using multiple if () statements for clarity.  The
>previous condition was a bit tortuous, and how that the static_key
>checks have been hoisted out, it makes little difference to the
>compiler how we express the condition here.
> ---
>  arch/arm64/include/asm/kvm_host.h |  6 
>  arch/arm64/kvm/fpsimd.c   |  5 +--
>  arch/arm64/kvm/hyp/switch.c   | 75 
> +--
>  3 files changed, 66 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 22cf484..4fabfd2 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -228,6 +228,8 @@ struct vcpu_reset_state {
>  
>  struct kvm_vcpu_arch {
>   struct kvm_cpu_context ctxt;
> + void *sve_state;
> + unsigned int sve_max_vl;
>  
>   /* HYP configuration */
>   u64 hcr_el2;
> @@ -323,6 +325,10 @@ struct kvm_vcpu_arch {
>   bool sysregs_loaded_on_cpu;
>  };
>  
> +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> +   sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> +
>  /* vcpu_arch flags field values: */
>  #define KVM_ARM64_DEBUG_DIRTY(1 << 0)
>  #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 7053bf4..6e3c9c8 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -87,10 +87,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>  
>   if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
>   fpsimd_bind_state_to_cpu(>arch.ctxt.gp_regs.fp_regs,
> -  NULL, SVE_VL_MIN);
> +  vcpu->arch.sve_state,
> +  vcpu->arch.sve_max_vl);
>  
>   clear_thread_flag(TIF_FOREIGN_FPSTATE);
> - clear_thread_flag(TIF_SVE);
> + update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
>   }
>  }
>  
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 9d46066..5444b9c 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -100,7 +100,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
>   val = read_sysreg(cpacr_el1);
>   val |= CPACR_EL1_TTA;
>   val &= ~CPACR_EL1_ZEN;
> - if (!update_fp_enabled(vcpu)) {
> + if (update_fp_enabled(vcpu)) {
> + if (vcpu_has_sve(vcpu))
> + val |= CPACR_EL1_ZEN;
> + } else {
>   val &= ~CPACR_EL1_FPEN;
>   __activate_traps_fpsimd32(vcpu);
>   }
> @@ -317,16 +320,48 @@ static bool __hyp_text __populate_fault_info(struct 
> kvm_vcpu *vcpu)
>   return true;
>  }
>  
> -static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
> +/* Check for an FPSIMD/SVE trap and handle as appropriate */
> +static bool __hyp_text __hyp_handle_fpsimd(struct 

Re: [PATCH v6 12/27] KVM: arm64/sve: System register context switch and access support

2019-03-26 Thread Julien Thierry
Hi Dave,

On 19/03/2019 17:52, Dave Martin wrote:
> This patch adds the necessary support for context switching ZCR_EL1
> for each vcpu.
> 
> ZCR_EL1 is trapped alongside the FPSIMD/SVE registers, so it makes
> sense for it to be handled as part of the guest FPSIMD/SVE context
> for context switch purposes instead of handling it as a general
> system register.  This means that it can be switched in lazily at
> the appropriate time.  No effort is made to track host context for
> this register, since SVE requires VHE: thus the hosts's value for
> this register lives permanently in ZCR_EL2 and does not alias the
> guest's value at any time.
> 
> The Hyp switch and fpsimd context handling code is extended
> appropriately.
> 
> Accessors are added in sys_regs.c to expose the SVE system
> registers and ID register fields.  Because these need to be
> conditionally visible based on the guest configuration, they are
> implemented separately for now rather than by use of the generic
> system register helpers.  This may be abstracted better later on
> when/if there are more features requiring this model.
> 
> ID_AA64ZFR0_EL1 is RO-RAZ for MRS/MSR when SVE is disabled for the
> guest, but for compatibility with non-SVE aware KVM implementations
> the register should not be enumerated at all for KVM_GET_REG_LIST
> in this case.  For consistency we also reject ioctl access to the
> register.  This ensures that a non-SVE-enabled guest looks the same
> to userspace, irrespective of whether the kernel KVM implementation
> supports SVE.
> 
> Signed-off-by: Dave Martin 

Reviewed-by: Julien Thierry 

Cheers,

Julien

> 
> ---
> 
> Changes since v5:
> 
>  * Port to the renamed visibility() framework.
> 
>  * Swap visiblity() helpers so that they appear by the relevant accessor
>functions.
> 
>  * [Julien Grall] With the visibility() checks, {get,set}_zcr_el1()
>degenerate to doing exactly what the common code does, so drop them.
> 
>The ID_AA64ZFR0_EL1 handlers are still needed to provide contitional
>RAZ behaviour.  This could be moved to the common code too, but since
>this is a one-off case I don't do this for now.  We can address this
>later if other regs need to follow the same pattern.
> 
>  * [Julien Thierry] Reset ZCR_EL1 to a fixed value using reset_val
>instead of using relying on reset_unknown() honouring set bits in val
>as RES0.
> 
>Most of the bits in ZCR_EL1 are RES0 anyway, and many implementations
>of SVE will support larger vectors than 128 bits, so 0 seems as good
>a value as any to expose guests that forget to initialise this
>register properly.
> ---
>  arch/arm64/include/asm/kvm_host.h |  1 +
>  arch/arm64/include/asm/sysreg.h   |  3 ++
>  arch/arm64/kvm/fpsimd.c   |  9 -
>  arch/arm64/kvm/hyp/switch.c   |  3 ++
>  arch/arm64/kvm/sys_regs.c | 83 
> ---
>  5 files changed, 93 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index ad4f7f0..22cf484 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -121,6 +121,7 @@ enum vcpu_sysreg {
>   SCTLR_EL1,  /* System Control Register */
>   ACTLR_EL1,  /* Auxiliary Control Register */
>   CPACR_EL1,  /* Coprocessor Access Control */
> + ZCR_EL1,/* SVE Control */
>   TTBR0_EL1,  /* Translation Table Base Register 0 */
>   TTBR1_EL1,  /* Translation Table Base Register 1 */
>   TCR_EL1,/* Translation Control Register */
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 5b267de..4d6262d 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -454,6 +454,9 @@
>  #define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6)
>  #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
>  
> +/* VHE encodings for architectural EL0/1 system registers */
> +#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
> +
>  /* Common SCTLR_ELx flags. */
>  #define SCTLR_ELx_DSSBS  (_BITUL(44))
>  #define SCTLR_ELx_ENIA   (_BITUL(31))
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 1cf4f02..7053bf4 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -103,14 +103,21 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>  void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
>  {
>   unsigned long flags;
> + bool host_has_sve = system_supports_sve();
> + bool guest_has_sve = vcpu_has_sve(vcpu);
>  
>   local_irq_save(flags);

[PATCH v2 7/9] arm/arm64: kvm: pmu: Make overflow handler NMI safe

2019-03-22 Thread Julien Thierry
When using an NMI for the PMU interrupt, taking any lock migh cause a
deadlock. The current PMU overflow handler in KVM takes takes locks when
trying to wake up a vcpu.

When overflow handler is called by an NMI, defer the vcpu waking in an
irq_work queue.

Signed-off-by: Julien Thierry 
Cc: Christoffer Dall 
Cc: Marc Zyngier 
Cc: Will Deacon 
Cc: Mark Rutland 
Cc: James Morse 
Cc: Suzuki K Pouloze 
Cc: kvmarm@lists.cs.columbia.edu
---
 include/kvm/arm_pmu.h |  1 +
 virt/kvm/arm/pmu.c| 37 +
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index f87fe20..6a7c9dd 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -37,6 +37,7 @@ struct kvm_pmu {
bool ready;
bool created;
bool irq_level;
+   struct irq_work overflow_work;
 };

 #define kvm_arm_pmu_v3_ready(v)((v)->arch.pmu.ready)
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 1c5b76c..a72c972 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -273,15 +273,37 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
kvm_pmu_update_state(vcpu);
 }

+static inline struct kvm_vcpu *kvm_pmu_to_vcpu(struct kvm_pmu *pmu)
+{
+   struct kvm_vcpu_arch *vcpu_arch;
+
+   vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
+   return container_of(vcpu_arch, struct kvm_vcpu, arch);
+}
+
 static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
 {
struct kvm_pmu *pmu;
-   struct kvm_vcpu_arch *vcpu_arch;

pmc -= pmc->idx;
pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
-   vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
-   return container_of(vcpu_arch, struct kvm_vcpu, arch);
+   return kvm_pmu_to_vcpu(pmu);
+}
+
+/**
+ * When perf interrupt is an NMI, we cannot safely notify the vcpu 
corresponding
+ * to the even.
+ * This is why we need a callback to do it once outside of the NMI context.
+ */
+static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
+{
+   struct kvm_vcpu *vcpu;
+   struct kvm_pmu *pmu;
+
+   pmu = container_of(work, struct kvm_pmu, overflow_work);
+   vcpu = kvm_pmu_to_vcpu(pmu);
+
+   kvm_vcpu_kick(vcpu);
 }

 /**
@@ -299,7 +321,11 @@ static void kvm_pmu_perf_overflow(struct perf_event 
*perf_event,

if (kvm_pmu_overflow_status(vcpu)) {
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
-   kvm_vcpu_kick(vcpu);
+
+   if (!in_nmi())
+   kvm_vcpu_kick(vcpu);
+   else
+   irq_work_queue(>arch.pmu.overflow_work);
}
 }

@@ -501,6 +527,9 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
return ret;
}

+   init_irq_work(>arch.pmu.overflow_work,
+ kvm_pmu_perf_overflow_notify_vcpu);
+
vcpu->arch.pmu.created = true;
return 0;
 }
--
1.9.1
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v7 7/10] KVM: arm/arm64: context-switch ptrauth registers

2019-03-21 Thread Julien Thierry


On 21/03/2019 06:08, Amit Daniel Kachhap wrote:
> Hi Julien,
> 
> On 3/20/19 5:43 PM, Julien Thierry wrote:
>> Hi Amit,
>>
>> On 19/03/2019 08:30, Amit Daniel Kachhap wrote:
>>> From: Mark Rutland 
>>>
>>> When pointer authentication is supported, a guest may wish to use it.
>>> This patch adds the necessary KVM infrastructure for this to work, with
>>> a semi-lazy context switch of the pointer auth state.
>>>
>>> Pointer authentication feature is only enabled when VHE is built
>>> in the kernel and present in the CPU implementation so only VHE code
>>> paths are modified.
>>>
>>> When we schedule a vcpu, we disable guest usage of pointer
>>> authentication instructions and accesses to the keys. While these are
>>> disabled, we avoid context-switching the keys. When we trap the guest
>>> trying to use pointer authentication functionality, we change to eagerly
>>> context-switching the keys, and enable the feature. The next time the
>>> vcpu is scheduled out/in, we start again. However the host key save is
>>> optimized and implemented inside ptrauth instruction/register access
>>> trap.
>>>
>>> Pointer authentication consists of address authentication and generic
>>> authentication, and CPUs in a system might have varied support for
>>> either. Where support for either feature is not uniform, it is hidden
>>> from guests via ID register emulation, as a result of the cpufeature
>>> framework in the host.
>>>
>>> Unfortunately, address authentication and generic authentication cannot
>>> be trapped separately, as the architecture provides a single EL2 trap
>>> covering both. If we wish to expose one without the other, we cannot
>>> prevent a (badly-written) guest from intermittently using a feature
>>> which is not uniformly supported (when scheduled on a physical CPU which
>>> supports the relevant feature). Hence, this patch expects both type of
>>> authentication to be present in a cpu.
>>>
>>> This switch of key is done from guest enter/exit assembly as preperation
>>> for the upcoming in-kernel pointer authentication support. Hence, these
>>> key switching routines are not implemented in C code as they may cause
>>> pointer authentication key signing error in some situations.
>>>
>>> Signed-off-by: Mark Rutland 
>>> [Only VHE, key switch in full assembly, vcpu_has_ptrauth checks
>>> , save host key in ptrauth exception trap]
>>> Signed-off-by: Amit Daniel Kachhap 
>>> Reviewed-by: Julien Thierry 
>>> Cc: Marc Zyngier 
>>> Cc: Christoffer Dall 
>>> Cc: kvmarm@lists.cs.columbia.edu
>>> ---
>>>   arch/arm64/include/asm/kvm_host.h    |  17 ++
>>>   arch/arm64/include/asm/kvm_ptrauth_asm.h | 100
>>> +++
>>>   arch/arm64/kernel/asm-offsets.c  |   6 ++
>>>   arch/arm64/kvm/guest.c   |  14 +
>>>   arch/arm64/kvm/handle_exit.c |  24 +---
>>>   arch/arm64/kvm/hyp/entry.S   |   7 +++
>>>   arch/arm64/kvm/reset.c   |   7 +++
>>>   arch/arm64/kvm/sys_regs.c    |  46 +-
>>>   virt/kvm/arm/arm.c   |   2 +
>>>   9 files changed, 212 insertions(+), 11 deletions(-)
>>>   create mode 100644 arch/arm64/include/asm/kvm_ptrauth_asm.h
>>>
> [...]
>>> +#ifdef    CONFIG_ARM64_PTR_AUTH
>>> +
>>> +#define PTRAUTH_REG_OFFSET(x)    (x - CPU_APIAKEYLO_EL1)
>>
>> I don't really see the point of this macro. You move the pointers of
>> kvm_cpu_contexts to point to where the ptr auth registers are (which is
>> in the middle of an array) by adding the offset of APIAKEYLO and then we
>> have to recompute all offsets with this macro.
>>
>> Why not just pass the kvm_cpu_context pointers to
>> ptrauth_save/restore_state and use the already defined offsets
>> (CPU_AP*_EL1) directly?
>>
>> I think this would also allow to use one less register for the
>> ptrauth_switch_to_* macros.
> Actually the values of CPU_AP*_EL1 are exceeding the immediate range
> (i.e 512), so this was done to keep the immediate offset within the range.
> The other way would have been to calculate the destination register but
> these would add one more add instruction everywhere.
> I should have mentioned them as comments somewhere.

Oh, I see. Yes, it would definitely be worth a comment.

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v7 9/10] KVM: arm64: docs: document KVM support of pointer authentication

2019-03-20 Thread Julien Thierry



On 20/03/2019 15:04, Kristina Martsenko wrote:
> On 20/03/2019 13:37, Julien Thierry wrote:
>> Hi Amit,
>>
>> On 19/03/2019 08:30, Amit Daniel Kachhap wrote:
>>> This adds sections for KVM API extension for pointer authentication.
>>> A brief description about usage of pointer authentication for KVM guests
>>> is added in the arm64 documentations.
> 
> [...]
> 
>>> diff --git a/Documentation/virtual/kvm/api.txt 
>>> b/Documentation/virtual/kvm/api.txt
>>> index 7de9eee..b5c66bc 100644
>>> --- a/Documentation/virtual/kvm/api.txt
>>> +++ b/Documentation/virtual/kvm/api.txt
>>> @@ -2659,6 +2659,12 @@ Possible features:
>>>   Depends on KVM_CAP_ARM_PSCI_0_2.
>>> - KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU.
>>>   Depends on KVM_CAP_ARM_PMU_V3.
>>> +   - KVM_ARM_VCPU_PTRAUTH_ADDRESS:
>>> +   - KVM_ARM_VCPU_PTRAUTH_GENERIC:
>>> + Enables Pointer authentication for the CPU.
>>> + Depends on KVM_CAP_ARM_PTRAUTH and only on arm64 architecture. If
>>> + set, then the KVM guest allows the execution of pointer authentication
>>> + instructions. Otherwise, KVM treats these instructions as undefined.
>>>  
>>
>> Overall I feel one could easily get confused to whether
>> PTRAUTH_ADDRESS/GENERIC are two individual features, whether one is a
>> superset of the other, if the names are just an alias of one another, etc...
>>
>> I think the doc should at least stress out that *both* flags are
>> required to enable ptrauth in a guest. However it raises the question,
>> if we don't plan to support the features individually (because we
>> can't), should we really expose two feature flags? I seems odd to
>> introduce two flags that only do something if used together...
> 
> Why can't we support the features individually? For example, if we ever
> get a system where all CPUs support address authentication and none of
> them support generic authentication, then we could still support address
> authentication in the guest.
> 
> 

That's a good point, I didn't think of that.

Although, currently we don't have a way to detect that we are in such a
configuration. So as is, both flags are required to enable either
feature, and I feel the documentation should be clear on that aspect.

Another option would be to introduce a flag that enables both for now,
and if one day we decide to support the configuration you mentioned we
could add "more modular" flags that allow you to control those features
individually. While a bit cumbersome, I would find that less awkward
than having two flags that only do something if both are present.

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v7 9/10] KVM: arm64: docs: document KVM support of pointer authentication

2019-03-20 Thread Julien Thierry
Hi Amit,

On 19/03/2019 08:30, Amit Daniel Kachhap wrote:
> This adds sections for KVM API extension for pointer authentication.
> A brief description about usage of pointer authentication for KVM guests
> is added in the arm64 documentations.
> 
> Signed-off-by: Amit Daniel Kachhap 
> Cc: Mark Rutland 
> Cc: Christoffer Dall 
> Cc: Marc Zyngier 
> Cc: kvmarm@lists.cs.columbia.edu
> ---
>  Documentation/arm64/pointer-authentication.txt | 15 +++
>  Documentation/virtual/kvm/api.txt  |  6 ++
>  2 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/arm64/pointer-authentication.txt 
> b/Documentation/arm64/pointer-authentication.txt
> index 5baca42..4b769e6 100644
> --- a/Documentation/arm64/pointer-authentication.txt
> +++ b/Documentation/arm64/pointer-authentication.txt
> @@ -87,7 +87,14 @@ used to get and set the keys for a thread.
>  Virtualization
>  --
>  
> -Pointer authentication is not currently supported in KVM guests. KVM
> -will mask the feature bits from ID_AA64ISAR1_EL1, and attempted use of
> -the feature will result in an UNDEFINED exception being injected into
> -the guest.
> +Pointer authentication is enabled in KVM guest when each virtual cpu is
> +initialised by passing flags KVM_ARM_VCPU_PTRAUTH_[ADDRESS/GENERIC] and
> +requesting this feature to be enabled. Without this flag, pointer

"Without these flags"*

> +authentication is not enabled in KVM guests and attempted use of the
> +feature will result in an UNDEFINED exception being injected into the
> +guest.
> +
> +Additionally, when these vcpu feature flags are not set then KVM will
> +filter out the Pointer Authentication system key registers from
> +KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID
> +register.
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index 7de9eee..b5c66bc 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2659,6 +2659,12 @@ Possible features:
> Depends on KVM_CAP_ARM_PSCI_0_2.
>   - KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU.
> Depends on KVM_CAP_ARM_PMU_V3.
> + - KVM_ARM_VCPU_PTRAUTH_ADDRESS:
> + - KVM_ARM_VCPU_PTRAUTH_GENERIC:
> +   Enables Pointer authentication for the CPU.
> +   Depends on KVM_CAP_ARM_PTRAUTH and only on arm64 architecture. If
> +   set, then the KVM guest allows the execution of pointer authentication
> +   instructions. Otherwise, KVM treats these instructions as undefined.
>  

Overall I feel one could easily get confused to whether
PTRAUTH_ADDRESS/GENERIC are two individual features, whether one is a
superset of the other, if the names are just an alias of one another, etc...

I think the doc should at least stress out that *both* flags are
required to enable ptrauth in a guest. However it raises the question,
if we don't plan to support the features individually (because we
can't), should we really expose two feature flags? I seems odd to
introduce two flags that only do something if used together...

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v7 7/10] KVM: arm/arm64: context-switch ptrauth registers

2019-03-20 Thread Julien Thierry
Hi Amit,

On 19/03/2019 08:30, Amit Daniel Kachhap wrote:
> From: Mark Rutland 
> 
> When pointer authentication is supported, a guest may wish to use it.
> This patch adds the necessary KVM infrastructure for this to work, with
> a semi-lazy context switch of the pointer auth state.
> 
> Pointer authentication feature is only enabled when VHE is built
> in the kernel and present in the CPU implementation so only VHE code
> paths are modified.
> 
> When we schedule a vcpu, we disable guest usage of pointer
> authentication instructions and accesses to the keys. While these are
> disabled, we avoid context-switching the keys. When we trap the guest
> trying to use pointer authentication functionality, we change to eagerly
> context-switching the keys, and enable the feature. The next time the
> vcpu is scheduled out/in, we start again. However the host key save is
> optimized and implemented inside ptrauth instruction/register access
> trap.
> 
> Pointer authentication consists of address authentication and generic
> authentication, and CPUs in a system might have varied support for
> either. Where support for either feature is not uniform, it is hidden
> from guests via ID register emulation, as a result of the cpufeature
> framework in the host.
> 
> Unfortunately, address authentication and generic authentication cannot
> be trapped separately, as the architecture provides a single EL2 trap
> covering both. If we wish to expose one without the other, we cannot
> prevent a (badly-written) guest from intermittently using a feature
> which is not uniformly supported (when scheduled on a physical CPU which
> supports the relevant feature). Hence, this patch expects both type of
> authentication to be present in a cpu.
> 
> This switch of key is done from guest enter/exit assembly as preperation
> for the upcoming in-kernel pointer authentication support. Hence, these
> key switching routines are not implemented in C code as they may cause
> pointer authentication key signing error in some situations.
> 
> Signed-off-by: Mark Rutland 
> [Only VHE, key switch in full assembly, vcpu_has_ptrauth checks
> , save host key in ptrauth exception trap]
> Signed-off-by: Amit Daniel Kachhap 
> Reviewed-by: Julien Thierry 
> Cc: Marc Zyngier 
> Cc: Christoffer Dall 
> Cc: kvmarm@lists.cs.columbia.edu
> ---
>  arch/arm64/include/asm/kvm_host.h|  17 ++
>  arch/arm64/include/asm/kvm_ptrauth_asm.h | 100 
> +++
>  arch/arm64/kernel/asm-offsets.c  |   6 ++
>  arch/arm64/kvm/guest.c   |  14 +
>  arch/arm64/kvm/handle_exit.c |  24 +---
>  arch/arm64/kvm/hyp/entry.S   |   7 +++
>  arch/arm64/kvm/reset.c   |   7 +++
>  arch/arm64/kvm/sys_regs.c|  46 +-
>  virt/kvm/arm/arm.c   |   2 +
>  9 files changed, 212 insertions(+), 11 deletions(-)
>  create mode 100644 arch/arm64/include/asm/kvm_ptrauth_asm.h
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 9dd2918..61239a6 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -152,6 +152,18 @@ enum vcpu_sysreg {
>   PMSWINC_EL0,/* Software Increment Register */
>   PMUSERENR_EL0,  /* User Enable Register */
>  
> + /* Pointer Authentication Registers */
> + APIAKEYLO_EL1,
> + APIAKEYHI_EL1,
> + APIBKEYLO_EL1,
> + APIBKEYHI_EL1,
> + APDAKEYLO_EL1,
> + APDAKEYHI_EL1,
> + APDBKEYLO_EL1,
> + APDBKEYHI_EL1,
> + APGAKEYLO_EL1,
> + APGAKEYHI_EL1,
> +
>   /* 32bit specific registers. Keep them at the end of the range */
>   DACR32_EL2, /* Domain Access Control Register */
>   IFSR32_EL2, /* Instruction Fault Status Register */
> @@ -497,6 +509,11 @@ static inline bool kvm_arch_requires_vhe(void)
>   test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) && \
>   test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features))
>  
> +void kvm_arm_vcpu_ptrauth_enable(struct kvm_vcpu *vcpu);
> +void kvm_arm_vcpu_ptrauth_disable(struct kvm_vcpu *vcpu);
> +void kvm_arm_vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu);
> +void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
> +
>  static inline void kvm_arch_hardware_unsetup(void) {}
>  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> diff --git a/arch/arm64/include/asm/kvm_ptrauth_asm.h 
> b/arch/arm64/include/asm/kvm_ptrauth_asm.h
> new file mode 100644
> index 000..97bb040
> --- /dev/null
> +++ b/a

Re: [PATCH v7 3/10] KVM: arm64: Move hyp_symbol_addr to fix dependency

2019-03-20 Thread Julien Thierry
Hi Amit,

On 19/03/2019 08:30, Amit Daniel Kachhap wrote:
> Currently hyp_symbol_addr is palced in kvm_mmu.h which is mostly
> used by __hyp_this_cpu_ptr in kvm_asm.h but it cannot include
> kvm_mmu.h directly as kvm_mmu.h uses kvm_ksym_ref which is
> defined inside kvm_asm.h. Hence, hyp_symbol_addr is moved inside
> kvm_asm.h to fix this dependency on each other.
> 
> Also kvm_ksym_ref is corresponding counterpart of hyp_symbol_addr
> so should be ideally placed inside kvm_asm.h.
> 

This part is a bit confusing, it lead me to think that kvm_ksym_ref was
in kvm_mmu.h and should moved to kvm_asm.h as well. I'd suggest
rephrasing it with something along the lines:

"Also, hyp_symbol_addr corresponding counterpart, kvm_ksym_ref, is
already in kvm_asm.h, making it more sensible to move kvm_symbol_addr to
the same file."

Otherwise:

Reviewed-by: Julien Thierry 

Cheers,

Julien

> Suggested by: James Morse 
> Signed-off-by: Amit Daniel Kachhap 
> Cc: Marc Zyngier 
> Cc: Christoffer Dall 
> Cc: kvmarm@lists.cs.columbia.edu
> ---
>  arch/arm64/include/asm/kvm_asm.h | 20 
>  arch/arm64/include/asm/kvm_mmu.h | 20 
>  2 files changed, 20 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h 
> b/arch/arm64/include/asm/kvm_asm.h
> index f5b79e9..57a07e8 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -80,6 +80,26 @@ extern void __vgic_v3_init_lrs(void);
>  
>  extern u32 __kvm_get_mdcr_el2(void);
>  
> +/*
> + * Obtain the PC-relative address of a kernel symbol
> + * s: symbol
> + *
> + * The goal of this macro is to return a symbol's address based on a
> + * PC-relative computation, as opposed to a loading the VA from a
> + * constant pool or something similar. This works well for HYP, as an
> + * absolute VA is guaranteed to be wrong. Only use this if trying to
> + * obtain the address of a symbol (i.e. not something you obtained by
> + * following a pointer).
> + */
> +#define hyp_symbol_addr(s)   \
> + ({  \
> + typeof(s) *addr;\
> + asm("adrp   %0, %1\n"   \
> + "add%0, %0, :lo12:%1\n" \
> + : "=r" (addr) : "S" ());  \
> + addr;   \
> + })
> +
>  /* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
>  #define __hyp_this_cpu_ptr(sym)  
> \
>   ({  \
> diff --git a/arch/arm64/include/asm/kvm_mmu.h 
> b/arch/arm64/include/asm/kvm_mmu.h
> index b0742a1..3dea6af 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -118,26 +118,6 @@ static inline unsigned long __kern_hyp_va(unsigned long 
> v)
>  #define kern_hyp_va(v)   ((typeof(v))(__kern_hyp_va((unsigned long)(v
>  
>  /*
> - * Obtain the PC-relative address of a kernel symbol
> - * s: symbol
> - *
> - * The goal of this macro is to return a symbol's address based on a
> - * PC-relative computation, as opposed to a loading the VA from a
> - * constant pool or something similar. This works well for HYP, as an
> - * absolute VA is guaranteed to be wrong. Only use this if trying to
> - * obtain the address of a symbol (i.e. not something you obtained by
> - * following a pointer).
> - */
> -#define hyp_symbol_addr(s)   \
> - ({  \
> - typeof(s) *addr;\
> - asm("adrp   %0, %1\n"   \
> - "add%0, %0, :lo12:%1\n" \
> -     : "=r" (addr) : "S" ());  \
> - addr;   \
> - })
> -
> -/*
>   * We currently support using a VM-specified IPA size. For backward
>   * compatibility, the default IPA size is fixed to 40bits.
>   */
> 

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v11 6/8] arm64: KVM: Enable VHE support for :G/:H perf event modifiers

2019-03-11 Thread Julien Thierry
;exclude_host ^ attr->exclude_guest);
>  }
> @@ -87,3 +95,78 @@ void __hyp_text __pmu_switch_to_host(struct 
> kvm_cpu_context *host_ctxt)
>   write_sysreg(pmu->events_host, pmcntenset_el0);
>  }
>  
> +/*
> + * Modify ARMv8 PMU events to include EL0 counting
> + */
> +static void kvm_vcpu_pmu_enable_el0(unsigned long events)
> +{
> + u64 typer;
> + u32 counter;
> +
> + for_each_set_bit(counter, , 32) {
> + write_sysreg(counter, pmselr_el0);
> + isb();
> + typer = read_sysreg(pmxevtyper_el0) & ~ARMV8_PMU_EXCLUDE_EL0;
> + write_sysreg(typer, pmxevtyper_el0);
> + isb();
> + }
> +}
> +
> +/*
> + * Modify ARMv8 PMU events to exclude EL0 counting
> + */
> +static void kvm_vcpu_pmu_disable_el0(unsigned long events)
> +{
> + u64 typer;
> + u32 counter;
> +
> + for_each_set_bit(counter, , 32) {
> + write_sysreg(counter, pmselr_el0);
> + isb();
> + typer = read_sysreg(pmxevtyper_el0) | ARMV8_PMU_EXCLUDE_EL0;
> + write_sysreg(typer, pmxevtyper_el0);
> + isb();
> + }
> +}
> +
> +/*
> + * On VHE ensure that only guest events have EL0 counting enabled
> + */
> +void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_cpu_context *host_ctxt;
> + struct kvm_host_data *host;
> + u32 events_guest, events_host;
> +
> + if (!has_vhe())
> + return;
> +
> + host_ctxt = vcpu->arch.host_cpu_context;
> + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
> + events_guest = host->pmu_events.events_guest;
> + events_host = host->pmu_events.events_host;
> +
> + kvm_vcpu_pmu_enable_el0(events_guest);
> + kvm_vcpu_pmu_disable_el0(events_host);

So, we load a vcpu, and all events common to the guest and the host
(events_guest & events_host) get the EXCLUDE_EL0 flag set.

I don't see anything that will remove that flag before running the
guest. Am I missing something? Should these lines be as follows?

kvm_vcpu_pmu_enable_el0(events_guest & events_host);
kvm_vcpu_pmu_enable_el0(events_guest ^ events_host);

> +}
> +
> +/*
> + * On VHE ensure that only guest host have EL0 counting enabled
> + */
> +void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_cpu_context *host_ctxt;
> + struct kvm_host_data *host;
> + u32 events_guest, events_host;
> +
> + if (!has_vhe())
> + return;
> +
> + host_ctxt = vcpu->arch.host_cpu_context;
> + host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
> + events_guest = host->pmu_events.events_guest;
> + events_host = host->pmu_events.events_host;
> +
> + kvm_vcpu_pmu_enable_el0(events_host);
> + kvm_vcpu_pmu_disable_el0(events_guest);

Same question as above, after vcpu_put, it seems we've disabled at EL0
host events that are common to the guest and the host.

Thanks,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v11 3/8] arm64: KVM: add accessors to track guest/host only counters

2019-03-08 Thread Julien Thierry
Hi Andrew,

On 08/03/2019 12:07, Andrew Murray wrote:
> In order to effeciently switch events_{guest,host} perf counters at
> guest entry/exit we add bitfields to kvm_cpu_context for guest and host
> events as well as accessors for updating them.
> 
> A function is also provided which allows the PMU driver to determine
> if a counter should start counting when it is enabled. With exclude_host,
> events on !VHE we may only start counting when entering the guest.
> 

I might have missed something here. Why is that true only for !VHE? Is
it because with VHE we can just exclude EL1?
(It's also a bit confusing since the patch does not seem to contain any
VHE/nVHE distinction)

> Signed-off-by: Andrew Murray 
> ---
>  arch/arm64/include/asm/kvm_host.h | 17 +++
>  arch/arm64/kvm/Makefile   |  2 +-
>  arch/arm64/kvm/pmu.c  | 49 +++
>  3 files changed, 67 insertions(+), 1 deletion(-)
>  create mode 100644 arch/arm64/kvm/pmu.c
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 1d36619d6650..4b7219128f2d 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -207,8 +207,14 @@ struct kvm_cpu_context {
>   struct kvm_vcpu *__hyp_running_vcpu;
>  };
>  
> +struct kvm_pmu_events {
> + u32 events_host;
> + u32 events_guest;
> +};
> +
>  struct kvm_host_data {
>   struct kvm_cpu_context host_ctxt;
> + struct kvm_pmu_events pmu_events;
>  };
>  
>  typedef struct kvm_host_data kvm_host_data_t;
> @@ -479,11 +485,22 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
>  void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
>  void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
>  
> +static inline bool kvm_pmu_counter_defered(struct perf_event_attr *attr)
> +{
> + return attr->exclude_host;
> +}
> +
>  #ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
>  static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
>  {
>   return kvm_arch_vcpu_run_map_fp(vcpu);
>  }
> +
> +void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
> +void kvm_clr_pmu_events(u32 clr);
> +#else
> +static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) 
> {}
> +static inline void kvm_clr_pmu_events(u32 clr) {}
>  #endif
>  
>  static inline void kvm_arm_vhe_guest_enter(void)
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index 0f2a135ba15b..f34cb49b66ae 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -19,7 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o 
> $(KVM)/arm/perf.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o 
> sys_regs_generic_v8.o
> -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o
> +kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o
>  
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o
> diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
> new file mode 100644
> index ..43965a3cc0f4
> --- /dev/null
> +++ b/arch/arm64/kvm/pmu.c
> @@ -0,0 +1,49 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * arch/arm64/kvm/pmu.c: Switching between guest and host counters
> + *
> + * Copyright 2019 Arm Limited
> + * Author: Andrew Murray 
> + */
> +#include 
> +#include 
> +
> +DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data);
> +
> +/*
> + * Given the exclude_{host,guest} attributes, determine if we are going
> + * to need to switch counters at guest entry/exit.
> + */
> +static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
> +{
> + /* Only switch if attributes are different */
> + return (attr->exclude_host ^ attr->exclude_guest);

Nit:

Is there any benefit to this rather than doing "attr->exclude_host !=
attr->exclude_guest" ? The code generated is most likely the same, I
just find the latter slightly more straightforward.

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 10/16] ref_cnt: Add simple ref counting API

2019-03-07 Thread Julien Thierry
Provide a simple API with structure and function for reference counting.
This is inspired by the linux kref.

Signed-off-by: Julien Thierry 
---
 include/kvm/ref_cnt.h | 53 +++
 1 file changed, 53 insertions(+)
 create mode 100644 include/kvm/ref_cnt.h

diff --git a/include/kvm/ref_cnt.h b/include/kvm/ref_cnt.h
new file mode 100644
index 000..1c8194c
--- /dev/null
+++ b/include/kvm/ref_cnt.h
@@ -0,0 +1,53 @@
+#ifndef KVM__REF_CNT_H
+#define KVM__REF_CNT_H
+
+#include "kvm/mutex.h"
+
+#ifdef __ATOMIC_ACQUIRE
+
+#define KVM_ATOMIC_ACQUIRE __ATOMIC_ACQUIRE
+#define KVM_ATOMIC_RELEASE __ATOMIC_RELEASE
+
+#define kvm_atomic_add_fetch(ptr, val, memorder)   \
+   __atomic_add_fetch((ptr), (val), (memorder))
+
+#define kvm_atomic_sub_fetch(ptr, val, memorder)   \
+   __atomic_sub_fetch((ptr), (val), (memorder))
+#else
+
+#define KVM_ATOMIC_ACQUIRE 0
+#define KVM_ATOMIC_RELEASE 0
+
+#define kvm_atomic_add_fetch(ptr, val, memorder)   \
+   __sync_fetch_and_add((ptr), (val))
+
+#define kvm_atomic_sub_fetch(ptr, val, memorder)   \
+   __sync_fetch_and_sub((ptr), (val))
+
+#endif
+
+struct ref_cnt {
+   int cnt;
+};
+
+#define REF_CNT_INIT (struct ref_cnt) { .cnt = 1 }
+
+static inline void ref_cnt_init(struct ref_cnt *ref_cnt)
+{
+   ref_cnt->cnt = 1;
+}
+
+static inline void ref_get(struct ref_cnt *ref_cnt)
+{
+   kvm_atomic_add_fetch(_cnt->cnt, 1, KVM_ATOMIC_ACQUIRE);
+
+}
+
+static inline void ref_put(struct ref_cnt *ref_cnt,
+  void (*release)(struct ref_cnt *ref_cnt))
+{
+   if (!kvm_atomic_sub_fetch(_cnt->cnt, 1, KVM_ATOMIC_RELEASE))
+   release(ref_cnt);
+}
+
+#endif /* KVM__REF_CNT_H */
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 11/16] mmio: Allow mmio callbacks to be called without locking

2019-03-07 Thread Julien Thierry
A vcpu might reconfigure some memory mapped region while other vcpus
access another one. Currently, edit to one mmio region blocks all other
mmio accesses.

Execute mmio callbacks outside of locking, protecting mmio data with
ref counting to prevent data being deleted while callback runs.

Signed-off-by: Julien Thierry 
---
 mmio.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/mmio.c b/mmio.c
index 61e1d47..03e1a76 100644
--- a/mmio.c
+++ b/mmio.c
@@ -2,6 +2,7 @@
 #include "kvm/kvm-cpu.h"
 #include "kvm/rbtree-interval.h"
 #include "kvm/brlock.h"
+#include "kvm/ref_cnt.h"
 
 #include 
 #include 
@@ -19,10 +20,18 @@ struct mmio_mapping {
struct rb_int_node  node;
void(*mmio_fn)(struct kvm_cpu *vcpu, u64 addr, u8 
*data, u32 len, u8 is_write, void *ptr);
void*ptr;
+   struct ref_cnt  ref_cnt;
 };
 
 static struct rb_root mmio_tree = RB_ROOT;
 
+static void mmio_release(struct ref_cnt *ref_cnt)
+{
+   struct mmio_mapping * mmio = container_of(ref_cnt, struct mmio_mapping, 
ref_cnt);
+
+   free(mmio);
+}
+
 static struct mmio_mapping *mmio_search(struct rb_root *root, u64 addr, u64 
len)
 {
struct rb_int_node *node;
@@ -75,6 +84,7 @@ int kvm__register_mmio(struct kvm *kvm, u64 phys_addr, u64 
phys_addr_len, bool c
.node = RB_INT_INIT(phys_addr, phys_addr + phys_addr_len),
.mmio_fn = mmio_fn,
.ptr= ptr,
+   .ref_cnt = REF_CNT_INIT,
};
 
if (coalesce) {
@@ -89,6 +99,7 @@ int kvm__register_mmio(struct kvm *kvm, u64 phys_addr, u64 
phys_addr_len, bool c
}
}
br_write_lock(kvm);
+   /* Pass ref to tree, no need to put ref */
ret = mmio_insert(_tree, mmio);
br_write_unlock(kvm);
 
@@ -106,6 +117,7 @@ bool kvm__deregister_mmio(struct kvm *kvm, u64 phys_addr)
br_write_unlock(kvm);
return false;
}
+   /* Taking the ref from the tree */
 
zone = (struct kvm_coalesced_mmio_zone) {
.addr   = phys_addr,
@@ -114,9 +126,10 @@ bool kvm__deregister_mmio(struct kvm *kvm, u64 phys_addr)
ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, );
 
rb_int_erase(_tree, >node);
+   ref_put(>ref_cnt, mmio_release);
+
br_write_unlock(kvm);
 
-   free(mmio);
return true;
 }
 
@@ -127,15 +140,20 @@ bool kvm__emulate_mmio(struct kvm_cpu *vcpu, u64 
phys_addr, u8 *data, u32 len, u
br_read_lock(vcpu->kvm);
mmio = mmio_search(_tree, phys_addr, len);
 
-   if (mmio)
-   mmio->mmio_fn(vcpu, phys_addr, data, len, is_write, mmio->ptr);
-   else {
+   if (!mmio) {
+   br_read_unlock(vcpu->kvm);
if (vcpu->kvm->cfg.mmio_debug)
fprintf(stderr, "Warning: Ignoring MMIO %s at %016llx 
(length %u)\n",
to_direction(is_write),
(unsigned long long)phys_addr, len);
+   return true;
}
+
+   ref_get(>ref_cnt);
br_read_unlock(vcpu->kvm);
 
+   mmio->mmio_fn(vcpu, phys_addr, data, len, is_write, mmio->ptr);
+   ref_put(>ref_cnt, mmio_release);
+
return true;
 }
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 01/16] Makefile: Only compile vesa for archs that need it

2019-03-07 Thread Julien Thierry
The vesa framebuffer is only used by architectures that explicitly
require it (i.e. x86). Compile it out for architectures not using it, as
its current implementation might not work for them.

Signed-off-by: Julien Thierry 
---
 Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index c4faff6..288e467 100644
--- a/Makefile
+++ b/Makefile
@@ -94,7 +94,6 @@ OBJS  += util/read-write.o
 OBJS   += util/util.o
 OBJS   += virtio/9p.o
 OBJS   += virtio/9p-pdu.o
-OBJS   += hw/vesa.o
 OBJS   += hw/pci-shmem.o
 OBJS   += kvm-ipc.o
 OBJS   += builtin-sandbox.o
@@ -219,6 +218,8 @@ else
 endif
 
 ifeq (y,$(ARCH_HAS_FRAMEBUFFER))
+   OBJS+= hw/vesa.o
+
CFLAGS_GTK3 := $(shell pkg-config --cflags gtk+-3.0 2>/dev/null)
LDFLAGS_GTK3 := $(shell pkg-config --libs gtk+-3.0 2>/dev/null)
ifeq ($(call try-build,$(SOURCE_GTK3),$(CFLAGS) 
$(CFLAGS_GTK3),$(LDFLAGS) $(LDFLAGS_GTK3)),y)
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 05/16] ioport: pci: Move port allocations to PCI devices

2019-03-07 Thread Julien Thierry
The dynamic ioport allocation with IOPORT_EMPTY is currently only used
by PCI devices. Other devices use fixed ports for which they request
registration to the ioport API.

PCI ports need to be in the PCI IO space and there is no reason ioport
API should know a PCI port is being allocated and needs to be placed in
PCI IO space. This currently just happens to be the case.

Move the responsability of dynamic allocation of ioports from the ioport
API to PCI.

In the future, if other types of devices also need dynamic ioport
allocation, they'll have to figure out the range of ports they are
allowed to use.

Signed-off-by: Julien Thierry 
---
 hw/pci-shmem.c   |  3 ++-
 hw/vesa.c|  4 ++--
 include/kvm/ioport.h |  3 ---
 include/kvm/pci.h|  2 ++
 ioport.c | 18 --
 pci.c|  8 
 vfio/core.c  |  6 --
 virtio/pci.c |  3 ++-
 8 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/hw/pci-shmem.c b/hw/pci-shmem.c
index f92bc75..a0c5ba8 100644
--- a/hw/pci-shmem.c
+++ b/hw/pci-shmem.c
@@ -357,7 +357,8 @@ int pci_shmem__init(struct kvm *kvm)
return 0;
 
/* Register MMIO space for MSI-X */
-   r = ioport__register(kvm, IOPORT_EMPTY, _pci__io_ops, 
IOPORT_SIZE, NULL);
+   r = pci_get_io_port_block(IOPORT_SIZE);
+   r = ioport__register(kvm, r, _pci__io_ops, IOPORT_SIZE, NULL);
if (r < 0)
return r;
ivshmem_registers = (u16)r;
diff --git a/hw/vesa.c b/hw/vesa.c
index f3c5114..404a8a3 100644
--- a/hw/vesa.c
+++ b/hw/vesa.c
@@ -60,8 +60,8 @@ struct framebuffer *vesa__init(struct kvm *kvm)
 
if (!kvm->cfg.vnc && !kvm->cfg.sdl && !kvm->cfg.gtk)
return NULL;
-
-   r = ioport__register(kvm, IOPORT_EMPTY, _io_ops, IOPORT_SIZE, 
NULL);
+   r = pci_get_io_space_block(IOPORT_SIZE);
+   r = ioport__register(kvm, r, _io_ops, IOPORT_SIZE, NULL);
if (r < 0)
return ERR_PTR(r);
 
diff --git a/include/kvm/ioport.h b/include/kvm/ioport.h
index db52a47..b10fcd5 100644
--- a/include/kvm/ioport.h
+++ b/include/kvm/ioport.h
@@ -14,11 +14,8 @@
 
 /* some ports we reserve for own use */
 #define IOPORT_DBG 0xe0
-#define IOPORT_START   0x6200
 #define IOPORT_SIZE0x400
 
-#define IOPORT_EMPTY   USHRT_MAX
-
 struct kvm;
 
 struct ioport {
diff --git a/include/kvm/pci.h b/include/kvm/pci.h
index a86c15a..bdbd183 100644
--- a/include/kvm/pci.h
+++ b/include/kvm/pci.h
@@ -19,6 +19,7 @@
 #define PCI_CONFIG_DATA0xcfc
 #define PCI_CONFIG_BUS_FORWARD 0xcfa
 #define PCI_IO_SIZE0x100
+#define PCI_IOPORT_START   0x6200
 #define PCI_CFG_SIZE   (1ULL << 24)
 
 struct kvm;
@@ -153,6 +154,7 @@ int pci__init(struct kvm *kvm);
 int pci__exit(struct kvm *kvm);
 struct pci_device_header *pci__find_dev(u8 dev_num);
 u32 pci_get_io_space_block(u32 size);
+u16 pci_get_io_port_block(u32 size);
 void pci__assign_irq(struct device_header *dev_hdr);
 void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void 
*data, int size);
 void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void 
*data, int size);
diff --git a/ioport.c b/ioport.c
index a6dc65e..a72e403 100644
--- a/ioport.c
+++ b/ioport.c
@@ -16,24 +16,8 @@
 
 #define ioport_node(n) rb_entry(n, struct ioport, node)
 
-DEFINE_MUTEX(ioport_mutex);
-
-static u16 free_io_port_idx; /* protected by ioport_mutex 
*/
-
 static struct rb_root  ioport_tree = RB_ROOT;
 
-static u16 ioport__find_free_port(void)
-{
-   u16 free_port;
-
-   mutex_lock(_mutex);
-   free_port = IOPORT_START + free_io_port_idx * IOPORT_SIZE;
-   free_io_port_idx++;
-   mutex_unlock(_mutex);
-
-   return free_port;
-}
-
 static struct ioport *ioport_search(struct rb_root *root, u64 addr)
 {
struct rb_int_node *node;
@@ -85,8 +69,6 @@ int ioport__register(struct kvm *kvm, u16 port, struct 
ioport_operations *ops, i
int r;
 
br_write_lock(kvm);
-   if (port == IOPORT_EMPTY)
-   port = ioport__find_free_port();
 
entry = ioport_search(_tree, port);
if (entry) {
diff --git a/pci.c b/pci.c
index 9edefa5..cd749db 100644
--- a/pci.c
+++ b/pci.c
@@ -19,6 +19,14 @@ static u32 pci_config_address_bits;
  * PCI isn't currently supported.)
  */
 static u32 io_space_blocks = KVM_PCI_MMIO_AREA;
+static u16 io_port_blocks  = PCI_IOPORT_START;
+
+u16 pci_get_io_port_block(u32 size)
+{
+   u16 port = ALIGN(io_port_blocks, IOPORT_SIZE);
+   io_port_blocks = port + size;
+   return port;
+}
 
 /*
  * BARs must be naturally aligned, so enforce this in the allocator.
diff --git a/vfio/core.c b/vfio/core.c
index 17b5b0c..0ed1e6f 100644
--- a/vfio/core.c
+++ b/vfio/core.c
@@ -202,8 +202,10 @@ static int vfio_setup_trap_region(struc

[PATCH kvmtool 04/16] pci: Fix BAR resource sizing arbitration

2019-03-07 Thread Julien Thierry
From: Sami Mujawar 

According to the 'PCI Local Bus Specification, Revision 3.0,
February 3, 2004, Section 6.2.5.1, Implementation Notes, page 227'

"Software saves the original value of the Base Address register,
writes 0  h to the register, then reads it back. Size
calculation can be done from the 32-bit value read by first
clearing encoding information bits (bit 0 for I/O, bits 0-3 for
memory), inverting all 32 bits (logical NOT), then incrementing
by 1. The resultant 32-bit value is the memory/I/O range size
decoded by the register. Note that the upper 16 bits of the result
is ignored if the Base Address register is for I/O and bits 16-31
returned zero upon read."

kvmtool was returning the actual BAR resource size which would be
incorrect as the software software drivers would invert all 32 bits
(logical NOT), then incrementing by 1. This ends up with a very large
resource size (in some cases more than 4GB) due to which drivers
assert/fail to work.

e.g if the BAR resource size was 0x1000, kvmtool would return 0x1000
instead of 0xF00x.

Fixed pci__config_wr() to return the size of the BAR in accordance with
the PCI Local Bus specification, Implementation Notes.

Signed-off-by: Sami Mujawar 
Signed-off-by: Julien Thierry 
---
 pci.c | 51 ---
 1 file changed, 48 insertions(+), 3 deletions(-)

diff --git a/pci.c b/pci.c
index 689869c..9edefa5 100644
--- a/pci.c
+++ b/pci.c
@@ -8,6 +8,9 @@
 #include 
 #include 
 
+/* Macro to check that a value is a power of 2 */
+#define power_of_2(pow) (((pow) != 0) && (((pow) & ((pow) - 1)) == 0))
+
 static u32 pci_config_address_bits;
 
 /* This is within our PCI gap - in an unused area.
@@ -173,9 +176,51 @@ void pci__config_wr(struct kvm *kvm, union 
pci_config_address addr, void *data,
 * BAR there next time it reads from it. When the kernel got the size it
 * would write the address back.
 */
-   if (bar < 6 && ioport__read32(data) == 0x) {
-   u32 sz = pci_hdr->bar_size[bar];
-   memcpy(base + offset, , sizeof(sz));
+   if (bar < 6) {
+   /*
+* According to the PCI local bus specification REV 3.0:
+* The number of upper bits that a device actually implements
+* depends on how much of the address space the device will
+* respond to. A device that wants a 1 MB memory address space
+* (using a 32-bit base address register) would build the top
+* 12 bits of the address register, hardwiring the other bits
+* to 0.
+* Furthermore software can determine how much address space the
+* device requires by writing a value of all 1's to the register
+* and then reading the value back. The device will return 0's 
in
+* all don't-care address bits, effectively specifying the 
address
+* space required.
+*
+* The following code emulates this by storing the value written
+* to the BAR, applying the size mask to clear the lower bits,
+* restoring the information bits and then updating the BAR 
value.
+*/
+   u32 bar_value;
+   /* Get the size mask */
+   u32 sz = ~(pci_hdr->bar_size[bar] - 1);
+   /* Extract the info bits */
+   u32 info = pci_hdr->bar[bar] & 0xF;
+
+   /* Store the value written by software */
+   memcpy(base + offset, data, size);
+
+   /* Apply the size mask to the bar value to clear the lower bits 
*/
+   bar_value = pci_hdr->bar[bar] & sz;
+
+   /* Warn if the bar size is not a power of 2 */
+   WARN_ON(!power_of_2(pci_hdr->bar_size[bar]));
+
+   /* Restore the info bits */
+   if ((info & 0x1) == 0x1) {
+   /* BAR for I/O */
+   bar_value = ((bar_value & ~0x3) | 0x1);
+   } else {
+   /* BAR for Memory */
+   bar_value = ((bar_value & ~0xF) | info);
+   }
+
+   /* Store the final BAR value */
+   pci_hdr->bar[bar] = bar_value;
} else {
memcpy(base + offset, data, size);
}
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 08/16] arm/pci: Do not use first PCI IO space bytes for devices

2019-03-07 Thread Julien Thierry
Linux has this convention that the lower 0x1000 bytes of the IO space
should not be used. (cf PCIBIOS_MIN_IO).

Just allocate those bytes to prevent future allocation assigning it to
devices.

Signed-off-by: Julien Thierry 
---
 arm/pci.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arm/pci.c b/arm/pci.c
index 83238ca..559e0cf 100644
--- a/arm/pci.c
+++ b/arm/pci.c
@@ -37,6 +37,9 @@ void pci__arm_init(struct kvm *kvm)
 
/* Make PCI port allocation start at a properly aligned address */
pci_get_io_space_block(align_pad);
+
+   /* Convention, don't allocate first 0x1000 bytes of PCI IO */
+   pci_get_io_space_block(0x1000);
 }
 
 void pci__generate_fdt_nodes(void *fdt)
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 13/16] vfio: Add support for BAR configuration

2019-03-07 Thread Julien Thierry
When a guest can reassign BARs, kvmtool needs to maintain the vfio_region
consistent with their corresponding BARs. Take the new updated addresses
from the PCI header read back from the vfio driver.

Also, to modify the BARs, it is expected that guests will disable
IO/Memory response in the PCI command. Support this by mapping/unmapping
regions when the corresponding response gets enabled/disabled.

Signed-off-by: Julien Thierry 
---
 vfio/core.c |  8 +++---
 vfio/pci.c  | 82 ++---
 2 files changed, 82 insertions(+), 8 deletions(-)

diff --git a/vfio/core.c b/vfio/core.c
index 0ed1e6f..b554897 100644
--- a/vfio/core.c
+++ b/vfio/core.c
@@ -202,14 +202,13 @@ static int vfio_setup_trap_region(struct kvm *kvm, struct 
vfio_device *vdev,
  struct vfio_region *region)
 {
if (region->is_ioport) {
-   int port = pci_get_io_port_block(region->info.size);
+   int port = ioport__register(kvm, region->port_base,
+   _ioport_ops,
+   region->info.size, region);
 
-   port = ioport__register(kvm, port, _ioport_ops,
-   region->info.size, region);
if (port < 0)
return port;
 
-   region->port_base = port;
return 0;
}
 
@@ -258,6 +257,7 @@ void vfio_unmap_region(struct kvm *kvm, struct vfio_region 
*region)
 {
if (region->host_addr) {
munmap(region->host_addr, region->info.size);
+   region->host_addr = NULL;
} else if (region->is_ioport) {
ioport__unregister(kvm, region->port_base);
} else {
diff --git a/vfio/pci.c b/vfio/pci.c
index 03de3c1..474f1c1 100644
--- a/vfio/pci.c
+++ b/vfio/pci.c
@@ -1,3 +1,4 @@
+#include "kvm/ioport.h"
 #include "kvm/irq.h"
 #include "kvm/kvm.h"
 #include "kvm/kvm-cpu.h"
@@ -452,6 +453,64 @@ static void vfio_pci_cfg_read(struct kvm *kvm, struct 
pci_device_header *pci_hdr
  sz, offset);
 }
 
+static void vfio_pci_cfg_handle_command(struct kvm *kvm, struct vfio_device 
*vdev,
+   void *data, int sz)
+{
+   struct pci_device_header *hdr = >pci.hdr;
+   bool switch_io;
+   bool switch_mem;
+   u16 cmd;
+   int i;
+
+   cmd = ioport__read16(data);
+   switch_io = !!((cmd ^ hdr->command) & PCI_COMMAND_IO);
+   switch_mem = !!((cmd ^ hdr->command) & PCI_COMMAND_MEMORY);
+
+   for (i = VFIO_PCI_BAR0_REGION_INDEX; i <= VFIO_PCI_BAR5_REGION_INDEX; 
++i) {
+   struct vfio_region *region = >regions[i];
+
+   if (region->is_ioport && switch_io) {
+   if (cmd & PCI_COMMAND_IO)
+   vfio_map_region(kvm, vdev, region);
+   else
+   vfio_unmap_region(kvm, region);
+   }
+
+   if (!region->is_ioport && switch_mem) {
+   if (cmd & PCI_COMMAND_MEMORY)
+   vfio_map_region(kvm, vdev, region);
+   else
+   vfio_unmap_region(kvm, region);
+   }
+   }
+}
+
+static void vfio_pci_cfg_update_bar(struct kvm *kvm, struct vfio_device *vdev, 
int bar)
+{
+   struct pci_device_header *hdr = >pci.hdr;
+   struct vfio_region *bar_region;
+
+   bar_region = >regions[bar + VFIO_PCI_BAR0_REGION_INDEX];
+
+   if (bar_region->is_ioport) {
+   if (hdr->command & PCI_COMMAND_IO)
+   vfio_unmap_region(kvm, bar_region);
+
+   bar_region->port_base = hdr->bar[bar] & 
PCI_BASE_ADDRESS_IO_MASK;
+
+   if (hdr->command & PCI_COMMAND_IO)
+   vfio_map_region(kvm, vdev, bar_region);
+   } else {
+   if (hdr->command & PCI_COMMAND_MEMORY)
+   vfio_unmap_region(kvm, bar_region);
+
+   bar_region->guest_phys_addr = hdr->bar[bar] & 
PCI_BASE_ADDRESS_MEM_MASK;
+
+   if (hdr->command & PCI_COMMAND_MEMORY)
+   vfio_map_region(kvm, vdev, bar_region);
+   }
+}
+
 static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header 
*pci_hdr,
   u8 offset, void *data, int sz)
 {
@@ -475,9 +534,15 @@ static void vfio_pci_cfg_write(struct kvm *kvm, struct 
pci_device_header *pci_hd
if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSI)
vfio_pci_msi_cap_write(kvm, vdev, offset, data, sz);
 
+   if (offset == PCI_COMMAND)
+   vfio_pci_cfg_handle_command(kvm, vdev, data, sz);
+
if (pread(

[PATCH kvmtool 07/16] arm/pci: Fix PCI IO region

2019-03-07 Thread Julien Thierry
Current PCI IO region that is exposed through the DT contains ports that
are reserved by non-PCI devices.

Use the proper PCI IO start so that the region exposed through DT can
actually be used to reassign device BARs.

Signed-off-by: Julien Thierry 
---
 arm/include/arm-common/pci.h |  1 +
 arm/kvm.c|  3 +++
 arm/pci.c| 21 ++---
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/arm/include/arm-common/pci.h b/arm/include/arm-common/pci.h
index 9008a0e..aea42b8 100644
--- a/arm/include/arm-common/pci.h
+++ b/arm/include/arm-common/pci.h
@@ -1,6 +1,7 @@
 #ifndef ARM_COMMON__PCI_H
 #define ARM_COMMON__PCI_H
 
+void pci__arm_init(struct kvm *kvm);
 void pci__generate_fdt_nodes(void *fdt);
 
 #endif /* ARM_COMMON__PCI_H */
diff --git a/arm/kvm.c b/arm/kvm.c
index b824f63..44d7796 100644
--- a/arm/kvm.c
+++ b/arm/kvm.c
@@ -6,6 +6,7 @@
 #include "kvm/fdt.h"
 
 #include "arm-common/gic.h"
+#include "arm-common/pci.h"
 
 #include 
 #include 
@@ -86,6 +87,8 @@ void kvm__arch_init(struct kvm *kvm, const char 
*hugetlbfs_path, u64 ram_size)
/* Create the virtual GIC. */
if (gic__create(kvm, kvm->cfg.arch.irqchip))
die("Failed to create virtual GIC");
+
+   pci__arm_init(kvm);
 }
 
 #define FDT_ALIGN  SZ_2M
diff --git a/arm/pci.c b/arm/pci.c
index 557cfa9..83238ca 100644
--- a/arm/pci.c
+++ b/arm/pci.c
@@ -1,3 +1,5 @@
+#include "linux/sizes.h"
+
 #include "kvm/devices.h"
 #include "kvm/fdt.h"
 #include "kvm/kvm.h"
@@ -7,6 +9,11 @@
 
 #include "arm-common/pci.h"
 
+#define ARM_PCI_IO_START ALIGN(PCI_IOPORT_START, SZ_4K)
+
+/* Must be a multiple of 4k */
+#define ARM_PCI_IO_SIZE ((ARM_MMIO_AREA - ARM_PCI_IO_START) & ~(SZ_4K - 1))
+
 /*
  * An entry in the interrupt-map table looks like:
  *
@@ -24,6 +31,14 @@ struct of_interrupt_map_entry {
struct of_gic_irq   gic_irq;
 } __attribute__((packed));
 
+void pci__arm_init(struct kvm *kvm)
+{
+   u32 align_pad = ARM_PCI_IO_START - PCI_IOPORT_START;
+
+   /* Make PCI port allocation start at a properly aligned address */
+   pci_get_io_space_block(align_pad);
+}
+
 void pci__generate_fdt_nodes(void *fdt)
 {
struct device_header *dev_hdr;
@@ -40,10 +55,10 @@ void pci__generate_fdt_nodes(void *fdt)
.pci_addr = {
.hi = 
cpu_to_fdt32(of_pci_b_ss(OF_PCI_SS_IO)),
.mid= 0,
-   .lo = 0,
+   .lo = cpu_to_fdt32(ARM_PCI_IO_START),
},
-   .cpu_addr   = cpu_to_fdt64(KVM_IOPORT_AREA),
-   .length = cpu_to_fdt64(ARM_IOPORT_SIZE),
+   .cpu_addr   = cpu_to_fdt64(ARM_PCI_IO_START),
+   .length = cpu_to_fdt64(ARM_PCI_IO_SIZE),
},
{
.pci_addr = {
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 06/16] pci: Fix ioport allocation size

2019-03-07 Thread Julien Thierry
The PCI Local Bus Specification, Rev. 3.0,
Section 6.2.5.1. "Address Maps" states:
"Devices that map control functions into I/O Space must not consume more
than 256 bytes per I/O Base Address register."

Yet all the PCI devices allocate IO ports of IOPORT_SIZE (= 1024 bytes).

Fix this by having PCI devices use 256 bytes ports for IO BARs.

Signed-off-by: Julien Thierry 
---
 hw/pci-shmem.c   |  4 ++--
 hw/vesa.c|  4 ++--
 include/kvm/ioport.h |  1 -
 pci.c|  2 +-
 virtio/pci.c | 14 +++---
 5 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/hw/pci-shmem.c b/hw/pci-shmem.c
index a0c5ba8..2e1474b 100644
--- a/hw/pci-shmem.c
+++ b/hw/pci-shmem.c
@@ -357,8 +357,8 @@ int pci_shmem__init(struct kvm *kvm)
return 0;
 
/* Register MMIO space for MSI-X */
-   r = pci_get_io_port_block(IOPORT_SIZE);
-   r = ioport__register(kvm, r, _pci__io_ops, IOPORT_SIZE, NULL);
+   r = pci_get_io_port_block(PCI_IO_SIZE);
+   r = ioport__register(kvm, r, _pci__io_ops, PCI_IO_SIZE, NULL);
if (r < 0)
return r;
ivshmem_registers = (u16)r;
diff --git a/hw/vesa.c b/hw/vesa.c
index 404a8a3..71935d5 100644
--- a/hw/vesa.c
+++ b/hw/vesa.c
@@ -60,8 +60,8 @@ struct framebuffer *vesa__init(struct kvm *kvm)
 
if (!kvm->cfg.vnc && !kvm->cfg.sdl && !kvm->cfg.gtk)
return NULL;
-   r = pci_get_io_space_block(IOPORT_SIZE);
-   r = ioport__register(kvm, r, _io_ops, IOPORT_SIZE, NULL);
+   r = pci_get_io_space_block(PCI_IO_SIZE);
+   r = ioport__register(kvm, r, _io_ops, PCI_IO_SIZE, NULL);
if (r < 0)
return ERR_PTR(r);
 
diff --git a/include/kvm/ioport.h b/include/kvm/ioport.h
index b10fcd5..8c86b71 100644
--- a/include/kvm/ioport.h
+++ b/include/kvm/ioport.h
@@ -14,7 +14,6 @@
 
 /* some ports we reserve for own use */
 #define IOPORT_DBG 0xe0
-#define IOPORT_SIZE0x400
 
 struct kvm;
 
diff --git a/pci.c b/pci.c
index cd749db..228a628 100644
--- a/pci.c
+++ b/pci.c
@@ -23,7 +23,7 @@ static u16 io_port_blocks = PCI_IOPORT_START;
 
 u16 pci_get_io_port_block(u32 size)
 {
-   u16 port = ALIGN(io_port_blocks, IOPORT_SIZE);
+   u16 port = ALIGN(io_port_blocks, PCI_IO_SIZE);
io_port_blocks = port + size;
return port;
 }
diff --git a/virtio/pci.c b/virtio/pci.c
index c8e16dd..5a6c0d0 100644
--- a/virtio/pci.c
+++ b/virtio/pci.c
@@ -406,7 +406,7 @@ static void virtio_pci__io_mmio_callback(struct kvm_cpu 
*vcpu,
 {
struct virtio_pci *vpci = ptr;
int direction = is_write ? KVM_EXIT_IO_OUT : KVM_EXIT_IO_IN;
-   u16 port = vpci->port_addr + (addr & (IOPORT_SIZE - 1));
+   u16 port = vpci->port_addr + (addr & (PCI_IO_SIZE - 1));
 
kvm__emulate_io(vcpu, port, data, direction, len, 1);
 }
@@ -420,14 +420,14 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct 
virtio_device *vdev,
vpci->kvm = kvm;
vpci->dev = dev;
 
-   r = pci_get_io_port_block(IOPORT_SIZE);
-   r = ioport__register(kvm, r, _pci__io_ops, IOPORT_SIZE, vdev);
+   r = pci_get_io_port_block(PCI_IO_SIZE);
+   r = ioport__register(kvm, r, _pci__io_ops, PCI_IO_SIZE, vdev);
if (r < 0)
return r;
vpci->port_addr = (u16)r;
 
-   vpci->mmio_addr = pci_get_io_space_block(IOPORT_SIZE);
-   r = kvm__register_mmio(kvm, vpci->mmio_addr, IOPORT_SIZE, false,
+   vpci->mmio_addr = pci_get_io_space_block(PCI_IO_SIZE);
+   r = kvm__register_mmio(kvm, vpci->mmio_addr, PCI_IO_SIZE, false,
   virtio_pci__io_mmio_callback, vpci);
if (r < 0)
goto free_ioport;
@@ -457,8 +457,8 @@ int virtio_pci__init(struct kvm *kvm, void *dev, struct 
virtio_device *vdev,
| 
PCI_BASE_ADDRESS_SPACE_MEMORY),
.status = cpu_to_le16(PCI_STATUS_CAP_LIST),
.capabilities   = (void *)>pci_hdr.msix - (void 
*)>pci_hdr,
-   .bar_size[0]= cpu_to_le32(IOPORT_SIZE),
-   .bar_size[1]= cpu_to_le32(IOPORT_SIZE),
+   .bar_size[0]= cpu_to_le32(PCI_IO_SIZE),
+   .bar_size[1]= cpu_to_le32(PCI_IO_SIZE),
.bar_size[2]= cpu_to_le32(PCI_IO_SIZE*2),
};
 
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 09/16] brlock: Use rwlock instead of pause

2019-03-07 Thread Julien Thierry
Pausing all vcpus when reconfiguring something at run time is a big
overhead. Use rwlock to allow vcpu not accessing ressources being
reconfigured to continue running.

Signed-off-by: Julien Thierry 
---
 include/kvm/brlock.h | 11 ---
 include/kvm/kvm.h|  2 --
 2 files changed, 13 deletions(-)

diff --git a/include/kvm/brlock.h b/include/kvm/brlock.h
index 1862210..7d81056 100644
--- a/include/kvm/brlock.h
+++ b/include/kvm/brlock.h
@@ -17,8 +17,6 @@
 #define barrier()  __asm__ __volatile__("": : :"memory")
 #endif
 
-#ifdef KVM_BRLOCK_DEBUG
-
 #include "kvm/rwsem.h"
 
 #define br_read_lock(kvm)  down_read(&(kvm)->brlock_sem);
@@ -27,13 +25,4 @@
 #define br_write_lock(kvm) down_write(&(kvm)->brlock_sem);
 #define br_write_unlock(kvm)   up_write(&(kvm)->brlock_sem);
 
-#else
-
-#define br_read_lock(kvm)  barrier()
-#define br_read_unlock(kvm)barrier()
-
-#define br_write_lock(kvm) kvm__pause(kvm)
-#define br_write_unlock(kvm)   kvm__continue(kvm)
-#endif
-
 #endif
diff --git a/include/kvm/kvm.h b/include/kvm/kvm.h
index 7a73818..2f1679e 100644
--- a/include/kvm/kvm.h
+++ b/include/kvm/kvm.h
@@ -82,9 +82,7 @@ struct kvm {
 
int vm_state;
 
-#ifdef KVM_BRLOCK_DEBUG
pthread_rwlock_tbrlock_sem;
-#endif
 };
 
 void kvm__set_dir(const char *fmt, ...);
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 00/16] Support PCI BAR configuration

2019-03-07 Thread Julien Thierry
Hi,

This series add support for guests writting to PCI BARs. Edk2 does this
and is not aware of the "linux,pci-probe-only" property in the chosen node.

- Patches 1 to 3 do miscelaneous fixes
- Patch 4 fixes the way we deal with BAR sizing
- Patches 5 to 8 fixes the allocation/assignment of PCI IO BARs
- Patches 9 to 12 make it possible to remap ioport and mmio regions
  from vcpu threads, without pausing the entire VM
- Patches 13 to 16 adds the support for writting to BARs

Cheers,

Julien

-->

Julien Thierry (15):
  Makefile: Only compile vesa for archs that need it
  brlock: Always pass argument to br_read_lock/unlock
  brlock: fix build with KVM_BRLOCK_DEBUG
  ioport: pci: Move port allocations to PCI devices
  pci: Fix ioport allocation size
  arm/pci: Fix PCI IO region
  arm/pci: Do not use first PCI IO space bytes for devices
  brlock: Use rwlock instead of pause
  ref_cnt: Add simple ref counting API
  mmio: Allow mmio callbacks to be called without locking
  ioport: Allow ioport callbacks to be called without locking
  vfio: Add support for BAR configuration
  virtio/pci: Make memory and IO BARs independent
  virtio/pci: update virtio mapping when PCI BARs are reconfigured
  arm/fdt: Remove PCI probe only property

Sami Mujawar (1):
  pci: Fix BAR resource sizing arbitration

 Makefile |   3 +-
 arm/fdt.c|   1 -
 arm/include/arm-common/pci.h |   1 +
 arm/kvm.c|   3 +
 arm/pci.c|  24 -
 hw/pci-shmem.c   |   3 +-
 hw/vesa.c|   4 +-
 include/kvm/brlock.h |  21 +---
 include/kvm/ioport.h |   5 -
 include/kvm/kvm.h|   2 +
 include/kvm/pci.h|   2 +
 include/kvm/ref_cnt.h|  53 ++
 include/kvm/virtio-pci.h |   1 +
 ioport.c |  80 
 kvm.c|   4 +
 mmio.c   |  30 --
 pci.c|  59 +++-
 vfio/core.c  |   6 +-
 vfio/pci.c   |  82 +++-
 virtio/pci.c | 223 ---
 20 files changed, 491 insertions(+), 116 deletions(-)
 create mode 100644 include/kvm/ref_cnt.h

--
1.9.1
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 15/16] virtio/pci: update virtio mapping when PCI BARs are reconfigured

2019-03-07 Thread Julien Thierry
Software can change the addresses of PCI BARs. In the case of virtio, the
BARs are associated with some IO ports or mmio regions. These are not
updated when the guest modifies PCI BARs, leading to some surprises.

Re-register the ports and mmio regions related to PCI BARs when they are
updated.

Signed-off-by: Julien Thierry 
---
 include/kvm/virtio-pci.h |   1 +
 virtio/pci.c | 153 +++
 2 files changed, 144 insertions(+), 10 deletions(-)

diff --git a/include/kvm/virtio-pci.h b/include/kvm/virtio-pci.h
index b70cadd..37ffe02 100644
--- a/include/kvm/virtio-pci.h
+++ b/include/kvm/virtio-pci.h
@@ -23,6 +23,7 @@ struct virtio_pci {
struct device_headerdev_hdr;
void*dev;
struct kvm  *kvm;
+   struct virtio_device*vdev;
 
u16 port_addr;
u32 mmio_addr;
diff --git a/virtio/pci.c b/virtio/pci.c
index 32f9824..1275d82 100644
--- a/virtio/pci.c
+++ b/virtio/pci.c
@@ -434,6 +434,132 @@ static void virtio_pci__io_mmio_callback(struct kvm_cpu 
*vcpu,
 data, len);
 }
 
+static inline int virtio_pci__register_io(struct kvm *kvm,
+ struct virtio_pci *vpci)
+{
+   int r;
+
+   r = ioport__register(kvm, vpci->port_addr, _pci__io_ops,
+PCI_IO_SIZE, vpci->vdev);
+   if (r < 0)
+   pr_warning("failed to register io port virtio_pci bar[0]: 0x%x, 
err: %d\n",
+  (u32) vpci->port_addr, r);
+
+   return r;
+}
+
+static inline int virtio_pci__register_mmio(struct kvm *kvm,
+   struct virtio_pci *vpci)
+{
+   int r;
+
+   r = kvm__register_mmio(kvm, vpci->mmio_addr, PCI_IO_SIZE, false,
+  virtio_pci__io_mmio_callback, vpci->vdev);
+   if (r < 0)
+   pr_warning("failed to register mmio virtio_pci bar[1]: 0x%x, 
err: %d\n",
+  vpci->mmio_addr, r);
+
+   return r;
+}
+
+static inline int virtio_pci__register_msix(struct kvm *kvm,
+   struct virtio_pci *vpci)
+{
+   int r;
+
+   r = kvm__register_mmio(kvm, vpci->msix_io_block,
+  PCI_IO_SIZE * 2, false,
+  virtio_pci__msix_mmio_callback,
+  vpci->vdev);
+   if (r < 0)
+   pr_warning("failed to register mmio virtio_pci bar[2]: 0x%x, 
err: %d\n",
+  vpci->msix_io_block, r);
+
+   return r;
+}
+
+static void virtio_pci__config_write(struct kvm *kvm,
+struct pci_device_header *pci_hdr,
+u8 offset, void *data, int sz)
+{
+   struct virtio_pci *vpci;
+
+   vpci = container_of(pci_hdr, struct virtio_pci, pci_hdr);
+
+   switch (offset) {
+   case PCI_COMMAND:
+   {
+   u16 cmd;
+
+   if (sz != 2)
+   die("unsupported size for pci command access");
+
+   cmd = ioport__read16(data);
+
+   /* Enable I/O response? */
+   if (cmd & PCI_COMMAND_IO
+   && !(pci_hdr->command & PCI_COMMAND_IO))
+   virtio_pci__register_io(kvm, vpci);
+
+   /* Enable mmio response? */
+   if (cmd & PCI_COMMAND_MEMORY
+   && !(pci_hdr->command & PCI_COMMAND_MEMORY)) {
+   virtio_pci__register_mmio(kvm, vpci);
+   virtio_pci__register_msix(kvm, vpci);
+   }
+
+   /* Disable mmio response? */
+   if (!(cmd & PCI_COMMAND_MEMORY)
+  && pci_hdr->command & PCI_COMMAND_MEMORY) {
+   kvm__deregister_mmio(kvm, vpci->msix_io_block);
+   kvm__deregister_mmio(kvm, vpci->mmio_addr);
+   }
+
+   /* Disable I/O response? */
+   if (!(cmd & PCI_COMMAND_IO)
+   && pci_hdr->command & PCI_COMMAND_IO)
+   ioport__unregister(kvm, vpci->port_addr);
+
+   break;
+   }
+   case PCI_BASE_ADDRESS_0:
+   if (sz != 4)
+   die("unsupported size for pci bar[0] access");
+
+   if (pci_hdr->command & PCI_COMMAND_IO)
+   ioport__unregister(kvm, vpci->port_addr);
+
+   vpci->port_addr = ioport__read32(data) & 0x;
+
+   vpci->port_addr &= PCI_BASE_ADDRESS_IO_MASK;
+
+   if (pci_hdr->command & PCI_COMMAND_IO)
+   virtio_pci__re

[PATCH kvmtool 02/16] brlock: Always pass argument to br_read_lock/unlock

2019-03-07 Thread Julien Thierry
The kvm argument is not passed to br_read_lock/unlock, this works for
the barrier implementation because the argument is not used. This ever
breaks if another lock implementation is used.

Signed-off-by: Julien Thierry 
---
 ioport.c | 4 ++--
 mmio.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ioport.c b/ioport.c
index 505e822..a6dc65e 100644
--- a/ioport.c
+++ b/ioport.c
@@ -184,7 +184,7 @@ bool kvm__emulate_io(struct kvm_cpu *vcpu, u16 port, void 
*data, int direction,
void *ptr = data;
struct kvm *kvm = vcpu->kvm;
 
-   br_read_lock();
+   br_read_lock(kvm);
entry = ioport_search(_tree, port);
if (!entry)
goto out;
@@ -201,7 +201,7 @@ bool kvm__emulate_io(struct kvm_cpu *vcpu, u16 port, void 
*data, int direction,
}
 
 out:
-   br_read_unlock();
+   br_read_unlock(kvm);
 
if (ret)
return true;
diff --git a/mmio.c b/mmio.c
index c648bec..61e1d47 100644
--- a/mmio.c
+++ b/mmio.c
@@ -124,7 +124,7 @@ bool kvm__emulate_mmio(struct kvm_cpu *vcpu, u64 phys_addr, 
u8 *data, u32 len, u
 {
struct mmio_mapping *mmio;
 
-   br_read_lock();
+   br_read_lock(vcpu->kvm);
mmio = mmio_search(_tree, phys_addr, len);
 
if (mmio)
@@ -135,7 +135,7 @@ bool kvm__emulate_mmio(struct kvm_cpu *vcpu, u64 phys_addr, 
u8 *data, u32 len, u
to_direction(is_write),
(unsigned long long)phys_addr, len);
}
-   br_read_unlock();
+   br_read_unlock(vcpu->kvm);
 
return true;
 }
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 16/16] arm/fdt: Remove PCI probe only property

2019-03-07 Thread Julien Thierry
PCI devices support BAR reassignment. Get rid of the no longer needed
linux property.

Signed-off-by: Julien Thierry 
---
 arm/fdt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arm/fdt.c b/arm/fdt.c
index 980015b..219248e 100644
--- a/arm/fdt.c
+++ b/arm/fdt.c
@@ -140,7 +140,6 @@ static int setup_fdt(struct kvm *kvm)
 
/* /chosen */
_FDT(fdt_begin_node(fdt, "chosen"));
-   _FDT(fdt_property_cell(fdt, "linux,pci-probe-only", 1));
_FDT(fdt_property_string(fdt, "bootargs", kvm->cfg.real_cmdline));
_FDT(fdt_property_u64(fdt, "kaslr-seed", kvm->cfg.arch.kaslr_seed));
 
-- 
1.9.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH kvmtool 12/16] ioport: Allow ioport callbacks to be called without locking

2019-03-07 Thread Julien Thierry
A vcpu might reconfigure some ioports while other vcpus access another one.
Currently, edit to one port blocks all other ioport accesses.

Execute ioport callbacks outside of locking. protecting ioport data with
ref counting to prevent data being deleted while callback runs.

Since ioport struct is being passed to all ioport callbacks, wrap it in
another structure that will get ref counted to avoid modifying all ioport
devices.

Signed-off-by: Julien Thierry 
---
 include/kvm/ioport.h |  1 -
 ioport.c | 68 ++--
 2 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/include/kvm/ioport.h b/include/kvm/ioport.h
index 8c86b71..a73f78c 100644
--- a/include/kvm/ioport.h
+++ b/include/kvm/ioport.h
@@ -18,7 +18,6 @@
 struct kvm;
 
 struct ioport {
-   struct rb_int_node  node;
struct ioport_operations*ops;
void*priv;
struct device_headerdev_hdr;
diff --git a/ioport.c b/ioport.c
index a72e403..2c3fe93 100644
--- a/ioport.c
+++ b/ioport.c
@@ -5,6 +5,7 @@
 #include "kvm/brlock.h"
 #include "kvm/rbtree-interval.h"
 #include "kvm/mutex.h"
+#include "kvm/ref_cnt.h"
 
 #include  /* for KVM_EXIT_* */
 #include 
@@ -14,11 +15,25 @@
 #include 
 #include 
 
-#define ioport_node(n) rb_entry(n, struct ioport, node)
+#define ioport_node(n) rb_entry(n, struct ioport_data, node)
 
 static struct rb_root  ioport_tree = RB_ROOT;
 
-static struct ioport *ioport_search(struct rb_root *root, u64 addr)
+struct ioport_data {
+   struct rb_int_node  node;
+   struct ioport   ioport;
+   struct ref_cnt  ref_cnt;
+};
+
+static void ioport_release(struct ref_cnt  *ref_cnt)
+{
+   struct ioport_data *data = container_of(ref_cnt,
+   struct ioport_data, ref_cnt);
+
+   free(data);
+}
+
+static struct ioport_data *ioport_search(struct rb_root *root, u64 addr)
 {
struct rb_int_node *node;
 
@@ -29,12 +44,12 @@ static struct ioport *ioport_search(struct rb_root *root, 
u64 addr)
return ioport_node(node);
 }
 
-static int ioport_insert(struct rb_root *root, struct ioport *data)
+static int ioport_insert(struct rb_root *root, struct ioport_data *data)
 {
return rb_int_insert(root, >node);
 }
 
-static void ioport_remove(struct rb_root *root, struct ioport *data)
+static void ioport_remove(struct rb_root *root, struct ioport_data *data)
 {
rb_int_erase(root, >node);
 }
@@ -65,7 +80,7 @@ static void generate_ioport_fdt_node(void *fdt,
 
 int ioport__register(struct kvm *kvm, u16 port, struct ioport_operations *ops, 
int count, void *param)
 {
-   struct ioport *entry;
+   struct ioport_data *entry;
int r;
 
br_write_lock(kvm);
@@ -74,14 +89,16 @@ int ioport__register(struct kvm *kvm, u16 port, struct 
ioport_operations *ops, i
if (entry) {
pr_warning("ioport re-registered: %x", port);
rb_int_erase(_tree, >node);
+   ref_put(>ref_cnt, ioport_release);
}
 
entry = malloc(sizeof(*entry));
if (entry == NULL)
return -ENOMEM;
 
-   *entry = (struct ioport) {
-   .node   = RB_INT_INIT(port, port + count),
+   ref_cnt_init(>ref_cnt);
+   entry->node = RB_INT_INIT(port, port + count);
+   entry->ioport = (struct ioport) {
.ops= ops,
.priv   = param,
.dev_hdr= (struct device_header) {
@@ -90,14 +107,15 @@ int ioport__register(struct kvm *kvm, u16 port, struct 
ioport_operations *ops, i
},
};
 
+   /* Give the ref to the tree */
r = ioport_insert(_tree, entry);
if (r < 0) {
-   free(entry);
+   ref_put(>ref_cnt, ioport_release);
br_write_unlock(kvm);
return r;
}
 
-   device__register(>dev_hdr);
+   device__register(>ioport.dev_hdr);
br_write_unlock(kvm);
 
return port;
@@ -105,7 +123,7 @@ int ioport__register(struct kvm *kvm, u16 port, struct 
ioport_operations *ops, i
 
 int ioport__unregister(struct kvm *kvm, u16 port)
 {
-   struct ioport *entry;
+   struct ioport_data *entry;
int r;
 
br_write_lock(kvm);
@@ -115,10 +133,9 @@ int ioport__unregister(struct kvm *kvm, u16 port)
if (!entry)
goto done;
 
-   device__unregister(>dev_hdr);
+   device__unregister(>ioport.dev_hdr);
ioport_remove(_tree, entry);
-
-   free(entry);
+   ref_put(>ref_cnt, ioport_release);
 
r = 0;
 
@@ -130,7 +147,7 @@ done:
 
 static void ioport__unregister_all(void)
 {
-   struct ioport *entry;
+   struct ioport_data *entry;
struct rb_node *rb;
struct rb_int

[PATCH kvmtool 14/16] virtio/pci: Make memory and IO BARs independent

2019-03-07 Thread Julien Thierry
Currently, callbacks for memory BAR (BAR[1]) sits on the IO BAR, calling
the IO port emulation. This means that BAR[1] needs COMMAND_IO to be
enabled whenever COMMAND_MEMORY is enabled.

Refactor the code so both BARs are indenpendent. Also, unify ioport/mmio
callback arguments so that they all receive a virtio_device.

Signed-off-by: Julien Thierry 
---
 virtio/pci.c | 69 
 1 file changed, 46 insertions(+), 23 deletions(-)

diff --git a/virtio/pci.c b/virtio/pci.c
index 5a6c0d0..32f9824 100644
--- a/virtio/pci.c
+++ b/virtio/pci.c
@@ -77,7 +77,7 @@ static inline bool virtio_pci__msix_enabled(struct virtio_pci 
*vpci)
return vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_ENABLE);
 }
 
-static bool virtio_pci__specific_io_in(struct kvm *kvm, struct virtio_device 
*vdev, u16 port,
+static bool virtio_pci__specific_io_in(struct kvm *kvm, struct virtio_device 
*vdev,
void *data, int size, int offset)
 {
u32 config_offset;
@@ -107,21 +107,18 @@ static bool virtio_pci__specific_io_in(struct kvm *kvm, 
struct virtio_device *vd
return false;
 }
 
-static bool virtio_pci__io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 
port, void *data, int size)
+static bool virtio_pci__data_in(struct kvm_cpu *vcpu, struct virtio_device 
*vdev,
+   unsigned long bar_offset, void *data, int size)
 {
-   unsigned long offset;
bool ret = true;
-   struct virtio_device *vdev;
struct virtio_pci *vpci;
struct kvm *kvm;
u32 val;
 
kvm = vcpu->kvm;
-   vdev = ioport->priv;
vpci = vdev->virtio;
-   offset = port - vpci->port_addr;
 
-   switch (offset) {
+   switch (bar_offset) {
case VIRTIO_PCI_HOST_FEATURES:
val = vdev->ops->get_host_features(kvm, vpci->dev);
ioport__write32(data, val);
@@ -143,13 +140,24 @@ static bool virtio_pci__io_in(struct ioport *ioport, 
struct kvm_cpu *vcpu, u16 p
vpci->isr = VIRTIO_IRQ_LOW;
break;
default:
-   ret = virtio_pci__specific_io_in(kvm, vdev, port, data, size, 
offset);
+   ret = virtio_pci__specific_io_in(kvm, vdev, data, size, 
bar_offset);
break;
};
 
return ret;
 }
 
+static bool virtio_pci__io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 
port, void *data, int size)
+{
+   struct virtio_device *vdev;
+   struct virtio_pci *vpci;
+
+   vdev = ioport->priv;
+   vpci = vdev->virtio;
+
+   return virtio_pci__data_in(vcpu, vdev, port - vpci->port_addr, data, 
size);
+}
+
 static void update_msix_map(struct virtio_pci *vpci,
struct msix_table *msix_entry, u32 vecnum)
 {
@@ -174,7 +182,7 @@ static void update_msix_map(struct virtio_pci *vpci,
irq__update_msix_route(vpci->kvm, gsi, _entry->msg);
 }
 
-static bool virtio_pci__specific_io_out(struct kvm *kvm, struct virtio_device 
*vdev, u16 port,
+static bool virtio_pci__specific_io_out(struct kvm *kvm, struct virtio_device 
*vdev,
void *data, int size, int offset)
 {
struct virtio_pci *vpci = vdev->virtio;
@@ -248,21 +256,18 @@ static bool virtio_pci__specific_io_out(struct kvm *kvm, 
struct virtio_device *v
return false;
 }
 
-static bool virtio_pci__io_out(struct ioport *ioport, struct kvm_cpu *vcpu, 
u16 port, void *data, int size)
+static bool virtio_pci__data_out(struct kvm_cpu *vcpu, struct virtio_device 
*vdev,
+unsigned long bar_offset, void *data, int size)
 {
-   unsigned long offset;
bool ret = true;
-   struct virtio_device *vdev;
struct virtio_pci *vpci;
struct kvm *kvm;
u32 val;
 
kvm = vcpu->kvm;
-   vdev = ioport->priv;
vpci = vdev->virtio;
-   offset = port - vpci->port_addr;
 
-   switch (offset) {
+   switch (bar_offset) {
case VIRTIO_PCI_GUEST_FEATURES:
val = ioport__read32(data);
virtio_set_guest_features(kvm, vdev, vpci->dev, val);
@@ -289,13 +294,26 @@ static bool virtio_pci__io_out(struct ioport *ioport, 
struct kvm_cpu *vcpu, u16
vdev->ops->notify_status(kvm, vpci->dev, vpci->status);
break;
default:
-   ret = virtio_pci__specific_io_out(kvm, vdev, port, data, size, 
offset);
+   ret = virtio_pci__specific_io_out(kvm, vdev, data, size, 
bar_offset);
break;
};
 
return ret;
 }
 
+static bool virtio_pci__io_out(struct ioport *ioport, struct kvm_cpu *vcpu, 
u16 port, void *data, int size)
+{
+   unsigned long offset;
+   struct virtio_device *vdev;
+   struct virtio_pci *vpci;
+
+   vdev = ioport->priv;
+  

Re: [PATCH v5 22/26] KVM: arm64/sve: Add pseudo-register for the guest's vector lengths

2019-03-01 Thread Julien Thierry



On 26/02/2019 12:13, Dave Martin wrote:
> On Thu, Feb 21, 2019 at 05:48:59PM +0000, Julien Thierry wrote:
>> Hi Dave,
>>
>> On 18/02/2019 19:52, Dave Martin wrote:
>>> This patch adds a new pseudo-register KVM_REG_ARM64_SVE_VLS to
>>> allow userspace to set and query the set of vector lengths visible
>>> to the guest, along with corresponding storage in struct
>>> kvm_vcpu_arch.
>>>
>>> In the future, multiple register slices per SVE register may be
>>> visible through the ioctl interface.  Once the set of slices has
>>> been determined we would not be able to allow the vector length set
>>> to be changed any more, in order to avoid userspace seeing
>>> inconsistent sets of registers.  For this reason, this patch adds
>>> support to track vcpu finalization explicitly, and enforce proper
>>> sequencing of ioctls.
>>>
>>> The new pseudo-register is not exposed yet.  Subsequent patches
>>> will allow SVE to be turned on for guest vcpus, making it visible.
>>>
>>> Signed-off-by: Dave Martin 
>>>
>>> ---
>>>
>>> Changes since v4:
>>>
>>>  * Add a UAPI header comment indicating the pseudo-register status of
>>>KVM_REG_ARM64_SVE_VLS.
>>>
>>>  * Get rid of the sve_vqs[] array from struct kvm_vcpu_arch.  This
>>>array is pointless, because its contents must match the host's
>>>internal sve_vq_map anyway, up to vcpu->arch.sve_max_vl.
>>>
>>>The ioctl register accessors are slow-path code, so we can decode
>>>or reconstruct sve_vqs[] on demand instead, for exchange with
>>>userspace.
>>>
>>>  * For compatibility with potential future architecture extensions,
>>>enabling vector lengths above 256 bytes for the guest is explicitly
>>>disallowed now (because the code for exposing additional bits
>>>through ioctl is currently missing).  This can be addressed later
>>>if/when needed.
>>>
>>> Note:
>>>
>>>  * I defensively pass an array by pointer here, to help avoid
>>>accidentally breaking assumptions during future maintenance.
>>>
>>>Due to (over?)zealous constification, this causes the following
>>>sparse warning.  I think this is sparse getting confused: I am not
>>>relying on any kernel-specific semantics here, and GCC generates no
>>>warning.
>>>
>>> +arch/arm64/kvm/guest.c:33: warning: incorrect type in argument 1 
>>> (different modifiers)
>>> +arch/arm64/kvm/guest.c:33:expected unsigned long long const [usertype] 
>>> ( *const vqs )[8]
>>> +arch/arm64/kvm/guest.c:33:got unsigned long long [usertype] ( * )[8]
>>>
>>> ---
> 
> [...]
> 
>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h 
>>> b/arch/arm64/include/uapi/asm/kvm.h
> 
> [...]
> 
>>> +static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg 
>>> *reg)
>>> +{
>>> +   unsigned int max_vq, vq;
>>> +   u64 vqs[DIV_ROUND_UP(SVE_VQ_MAX - SVE_VQ_MIN + 1, 64)];
>>> +
>>> +   if (WARN_ON(!sve_vl_valid(vcpu->arch.sve_max_vl)))
>>> +   return -EINVAL;
>>> +
>>> +   memset(vqs, 0, sizeof(vqs));
>>> +
>>> +   max_vq = sve_vq_from_vl(vcpu->arch.sve_max_vl);
>>> +   for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
>>> +   if (sve_vq_available(vq))
>>> +   vqs[vq_word(vq)] |= vq_mask(vq);
>>> +
>>> +   BUILD_BUG_ON(sizeof(vqs) != KVM_REG_SIZE(reg->id));
>>
>> reg->id is not know at build time. From my understanding of
>> BUILD_BUG_ON(), things actually ends up evaluated at runtime but I'm not
>> sure what happens when doing sizeof(char[1- 2*0]) at runtime.
>>
>> Anyway, I don't think this is intended.
> 
> There's no runtime check: BUILD_BUG_ON() will cause compilation to fail
> if the required condition doesn't fall out from optimisation.
> 
> Because of the way this function is called, reg->id is always
> KVM_REG_ARM64_SVE_VLS, so inlining and constant propagation will make
> this check pass (and compile to nothing).
> 
> We can assume a certain amount of inlining: the kernel officially can't
> be built without optimisation.  But the precise build configuration can
> sometimes have an effect here -- so it may not be better to rely on this
> working for this slow-path code.
> 
> I'll convert these to if (WARN_ON()) return -EIO or something, or drop
> t

Re: [PATCH v5 18/26] KVM: arm64/sve: Add SVE support to register access ioctl interface

2019-03-01 Thread Julien Thierry



On 26/02/2019 12:13, Dave Martin wrote:
> On Thu, Feb 21, 2019 at 03:23:37PM +0000, Julien Thierry wrote:
>> Hi Dave,
>>
>> On 18/02/2019 19:52, Dave Martin wrote:
>>> This patch adds the following registers for access via the
>>> KVM_{GET,SET}_ONE_REG interface:
>>>
>>>  * KVM_REG_ARM64_SVE_ZREG(n, i) (n = 0..31) (in 2048-bit slices)
>>>  * KVM_REG_ARM64_SVE_PREG(n, i) (n = 0..15) (in 256-bit slices)
>>>  * KVM_REG_ARM64_SVE_FFR(i) (in 256-bit slices)
>>>
>>> In order to adapt gracefully to future architectural extensions,
>>> the registers are logically divided up into slices as noted above:
>>> the i parameter denotes the slice index.
>>>
>>> This allows us to reserve space in the ABI for future expansion of
>>> these registers.  However, as of today the architecture does not
>>> permit registers to be larger than a single slice, so no code is
>>> needed in the kernel to expose additional slices, for now.  The
>>> code can be extended later as needed to expose them up to a maximum
>>> of 32 slices (as carved out in the architecture itself) if they
>>> really exist someday.
>>>
>>> The registers are only visible for vcpus that have SVE enabled.
>>> They are not enumerated by KVM_GET_REG_LIST on vcpus that do not
>>> have SVE.
>>>
>>> Accesses to the FPSIMD registers via KVM_REG_ARM_CORE is not
>>> allowed for SVE-enabled vcpus: SVE-aware userspace can use the
>>> KVM_REG_ARM64_SVE_ZREG() interface instead to access the same
>>> register state.  This avoids some complex and pointless emulation
>>> in the kernel to convert between the two views of these aliased
>>> registers.
>>>
>>> Signed-off-by: Dave Martin 
>>>
>>> ---
>>>
>>> Changes since v4:
>>>
>>>  * Add "BASE" #defines for the Z-reg and P-reg ranges in the KVM
>>>register ID space, to make the 0x400 magic number a little less
>>>cryptic.
>>>
>>>  * Pull KVM_SVE_{Z,P}REG_SIZE defines from "KVM: arm64: Enumerate SVE
>>>register indices for KVM_GET_REG_LIST", since we now use them here.
>>>
>>>  * Simplify sve_reg_region(), and give up on the attempt to make
>>>kreg_region a general thing: nothing else will use it for now,
>>>anyway, so let's keep it as simple as possible.
>>>
>>>  * Drop support for multiple slices per register.  This functionality
>>>can be added back in later if needed, without ABI breaks.
>>>
>>>  * Pull vcpu_sve_state_size() into kvm_host.h, from "KVM: arm64/sve:
>>>Allow userspace to enable SVE for vcpus".  This is needed for use
>>>with array_index_nospec() to determine the applicable buffer bounds.
>>>To avoid circular header deependency issues, the function is also
>>>converted into a macro, but is otherwise equivalent to the original
>>>version.
>>>
>>>  * Guard sve_state base offset in kernel memory with
>>>array_index_nospec(), since it is generated from user data that can
>>>put it out of range.
>>>
>>>(sve_state will get allocated with the corresponding size later in
>>>the series.  For now, this code is dormant since no means is
>>>provided for userspace to create SVE-enabled vcpus yet.)
>>> ---
>>>  arch/arm64/include/asm/kvm_host.h |  14 
>>>  arch/arm64/include/uapi/asm/kvm.h |  17 +
>>>  arch/arm64/kvm/guest.c| 138 
>>> ++
>>>  3 files changed, 157 insertions(+), 12 deletions(-)
>>>
>>
>> [...]
>>
>>> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
>>> index f491456..8cfa889 100644
>>> --- a/arch/arm64/kvm/guest.c
>>> +++ b/arch/arm64/kvm/guest.c
>>
>> [...]
>>
>>> @@ -211,6 +217,114 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const 
>>> struct kvm_one_reg *reg)
>>> return err;
>>>  }
>>>  
>>> +#define SVE_REG_SLICE_SHIFT0
>>> +#define SVE_REG_SLICE_BITS 5
>>> +#define SVE_REG_ID_SHIFT   (SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS)
>>> +#define SVE_REG_ID_BITS5
>>> +
>>> +#define SVE_REG_SLICE_MASK \
>>> +   GENMASK(SVE_REG_SLICE_SHIFT + SVE_REG_SLICE_BITS - 1,   \
>>> +   SVE_REG_SLICE_SHIFT)
>>> +#define SVE_REG_ID_MASK   

Re: [PATCH v5 06/26] arm64/sve: Check SVE virtualisability

2019-03-01 Thread Julien Thierry



On 26/02/2019 12:06, Dave Martin wrote:
> On Wed, Feb 20, 2019 at 11:12:49AM +0000, Julien Thierry wrote:
>> Hi Dave,
>>
>> On 18/02/2019 19:52, Dave Martin wrote:
>>> Due to the way the effective SVE vector length is controlled and
>>> trapped at different exception levels, certain mismatches in the
>>> sets of vector lengths supported by different physical CPUs in the
>>> system may prevent straightforward virtualisation of SVE at parity
>>> with the host.
>>>
>>> This patch analyses the extent to which SVE can be virtualised
>>> safely without interfering with migration of vcpus between physical
>>> CPUs, and rejects late secondary CPUs that would erode the
>>> situation further.
>>>
>>> It is left up to KVM to decide what to do with this information.
>>>
>>> Signed-off-by: Dave Martin 
>>> ---
>>>  arch/arm64/include/asm/fpsimd.h |  1 +
>>>  arch/arm64/kernel/cpufeature.c  |  2 +-
>>>  arch/arm64/kernel/fpsimd.c  | 86 
>>> ++---
>>>  3 files changed, 73 insertions(+), 16 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/fpsimd.h 
>>> b/arch/arm64/include/asm/fpsimd.h
>>> index dd1ad39..964adc9 100644
>>> --- a/arch/arm64/include/asm/fpsimd.h
>>> +++ b/arch/arm64/include/asm/fpsimd.h
>>> @@ -87,6 +87,7 @@ extern void sve_kernel_enable(const struct 
>>> arm64_cpu_capabilities *__unused);
>>>  extern u64 read_zcr_features(void);
>>>  
>>>  extern int __ro_after_init sve_max_vl;
>>> +extern int __ro_after_init sve_max_virtualisable_vl;
>>>  
>>>  #ifdef CONFIG_ARM64_SVE
>>>  
>>> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
>>> index f6d84e2..5eaacb4 100644
>>> --- a/arch/arm64/kernel/cpufeature.c
>>> +++ b/arch/arm64/kernel/cpufeature.c
>>> @@ -1825,7 +1825,7 @@ static void verify_sve_features(void)
>>> unsigned int len = zcr & ZCR_ELx_LEN_MASK;
>>>  
>>> if (len < safe_len || sve_verify_vq_map()) {
>>> -   pr_crit("CPU%d: SVE: required vector length(s) missing\n",
>>> +   pr_crit("CPU%d: SVE: vector length support mismatch\n",
>>> smp_processor_id());
>>> cpu_die_early();
>>> }
>>> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
>>> index 62c37f0..64729e2 100644
>>> --- a/arch/arm64/kernel/fpsimd.c
>>> +++ b/arch/arm64/kernel/fpsimd.c
>>> @@ -18,6 +18,7 @@
>>>   */
>>>  
>>>  #include 
>>> +#include 
>>>  #include 
>>>  #include 
>>>  #include 
>>> @@ -48,6 +49,7 @@
>>>  #include 
>>>  #include 
>>>  #include 
>>> +#include 
>>>  
>>>  #define FPEXC_IOF  (1 << 0)
>>>  #define FPEXC_DZF  (1 << 1)
>>> @@ -130,14 +132,18 @@ static int sve_default_vl = -1;
>>>  
>>>  /* Maximum supported vector length across all CPUs (initially poisoned) */
>>>  int __ro_after_init sve_max_vl = SVE_VL_MIN;
>>> +int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN;
>>>  /* Set of available vector lengths, as vq_to_bit(vq): */
>>>  static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
>>> +/* Set of vector lengths present on at least one cpu: */
>>> +static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
>>>  static void __percpu *efi_sve_state;
>>>  
>>>  #else /* ! CONFIG_ARM64_SVE */
>>>  
>>>  /* Dummy declaration for code that will be optimised out: */
>>>  extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
>>> +extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX);
>>>  extern void __percpu *efi_sve_state;
>>>  
>>>  #endif /* ! CONFIG_ARM64_SVE */
>>> @@ -623,12 +629,6 @@ int sve_get_current_vl(void)
>>> return sve_prctl_status(0);
>>>  }
>>>  
>>> -/*
>>> - * Bitmap for temporary storage of the per-CPU set of supported vector 
>>> lengths
>>> - * during secondary boot.
>>> - */
>>> -static DECLARE_BITMAP(sve_secondary_vq_map, SVE_VQ_MAX);
>>> -
>>>  static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
>>>  {
>>> unsigned int vq, vl;
>>> @@ -650,6 +650,7 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, 
>>> SVE_

Re: [PATCH v5 24/26] KVM: arm64: Add a capabillity to advertise SVE support

2019-02-22 Thread Julien Thierry
Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:
> To provide a uniform way to check for KVM SVE support amongst other
> features, this patch adds a suitable capability KVM_CAP_ARM_SVE,
> and reports it as present when SVE is available.
> 
> Signed-off-by: Dave Martin 
> ---
>  arch/arm64/kvm/reset.c   | 8 
>  include/uapi/linux/kvm.h | 1 +
>  2 files changed, 9 insertions(+)
> 
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index e67cd2e..f636b34 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -35,6 +35,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  /* Maximum phys_shift supported for any VM on this host */
>  static u32 kvm_ipa_limit;
> @@ -93,6 +94,9 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long 
> ext)
>   case KVM_CAP_ARM_VM_IPA_SIZE:
>   r = kvm_ipa_limit;
>   break;
> + case KVM_CAP_ARM_SVE:
> + r = system_supports_sve();
> + break;
>   default:
>   r = 0;
>   }
> @@ -105,6 +109,10 @@ static int kvm_reset_sve(struct kvm_vcpu *vcpu)
>   if (!system_supports_sve())
>   return -EINVAL;
>  
> + /* Verify that KVM startup enforced this when SVE was detected: */
> + if (WARN_ON(!has_vhe()))
> + return -EINVAL;

I'm wondering, wouldn't it make more sense to check for this when
userland tries to set KVM_ARM_VCPU_SVE?

Otherwise:

Reviewed-by: Julien Thierry 

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 23/26] KVM: arm64/sve: Allow userspace to enable SVE for vcpus

2019-02-22 Thread Julien Thierry
Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:
> Now that all the pieces are in place, this patch offers a new flag
> KVM_ARM_VCPU_SVE that userspace can pass to KVM_ARM_VCPU_INIT to
> turn on SVE for the guest, on a per-vcpu basis.
> 
> As part of this, support for initialisation and reset of the SVE
> vector length set and registers is added in the appropriate places.
> Allocation SVE registers is deferred until kvm_arm_vcpu_finalize(),
> by which time the size of the registers is known.
> 
> Setting the vector lengths supported by the vcpu is considered
> configuration of the emulated hardware rather than runtime
> configuration, so no support is offered for changing the vector
> lengths of an existing vcpu across reset.
> 
> Signed-off-by: Dave Martin 
> 
> ---
> 
> Changes since v4:
> 
>  * Pull out vcpu_sve_state_size(), for use earlier in the series.
> 
>  * Remove unnecessary vcpu->arch.sve_vqs[], and clamp maximum guest
>vector length to 256 bytes for forwards compatibility.
> 
>(See "KVM: arm64/sve: Add pseudo-register for the guest's vector
>lengths".)
> 
>  * Minor tidyups to make some checks less verbose.
> ---
>  arch/arm64/include/asm/kvm_host.h |  2 +-
>  arch/arm64/include/uapi/asm/kvm.h |  1 +
>  arch/arm64/kvm/reset.c| 70 
> ++-
>  3 files changed, 71 insertions(+), 2 deletions(-)
> 

[...]

> diff --git a/arch/arm64/include/uapi/asm/kvm.h 
> b/arch/arm64/include/uapi/asm/kvm.h
> index 7ff1bd4..6963b7e 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -102,6 +102,7 @@ struct kvm_regs {
>  #define KVM_ARM_VCPU_EL1_32BIT   1 /* CPU running a 32bit VM */
>  #define KVM_ARM_VCPU_PSCI_0_22 /* CPU uses PSCI v0.2 */
>  #define KVM_ARM_VCPU_PMU_V3  3 /* Support guest PMUv3 */
> +#define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */
>  
>  struct kvm_vcpu_init {
>   __u32 target;
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 1379fb2..e67cd2e 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c

[...]

> @@ -98,11 +100,69 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, 
> long ext)
>   return r;
>  }
>  
> +static int kvm_reset_sve(struct kvm_vcpu *vcpu)
> +{
> + if (!system_supports_sve())
> + return -EINVAL;
> +
> + /* If resetting an already-configured vcpu, just zero the SVE regs: */
> + if (vcpu->arch.sve_state) {
> + size_t size = vcpu_sve_state_size(vcpu);
> +
> + if (!size || WARN_ON(!vcpu_has_sve(vcpu)))
> + return -EINVAL;
> +
> + memset(vcpu->arch.sve_state, 0, size);
> + return 0;
> + }
> +
> + if (WARN_ON(!sve_vl_valid(sve_max_vl)))
> + return -EINVAL;
> +
> + /* If the full set of host vector lengths cannot be used, give up: */
> + if (sve_max_virtualisable_vl < sve_max_vl)
> + return -EINVAL;
> +
> + /* Default to the set of vector lengths supported by the host */
> + vcpu->arch.sve_max_vl = sve_max_vl;
> +
> + /*
> +  * The get_sve_reg()/set_sve_reg() ioctl interface will need
> +  * to be extended with multiple register slice support in
> +  * order to support vector lengths greater than
> +  * SVE_VL_ARCH_MAX:
> +  */
> + if (WARN_ONCE(vcpu->arch.sve_max_vl > SVE_VL_ARCH_MAX,
> +   "KVM: SVE vector length for guests limited to %d bytes\n",
> +   SVE_VL_ARCH_MAX))
> + vcpu->arch.sve_max_vl = SVE_VL_ARCH_MAX;
> +
> + /*
> +  * Userspace can still customize the vector lengths by writing
> +  * KVM_REG_ARM64_SVE_VLS.  Allocation is deferred until
> +  * kvm_arm_vcpu_finalize(), which freezes the configuration.
> +  */
> + vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
> +
> + return 0;
> +}
> +
>  int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu)
>  {
>   if (likely(kvm_arm_vcpu_finalized(vcpu)))
>   return 0;
>  
> + if (vcpu_has_sve(vcpu)) {
> + size_t size = vcpu_sve_state_size(vcpu);
> +
> + if (!size)
> + return -EINVAL;
> +
> + vcpu->arch.sve_state = kzalloc(size, GFP_KERNEL);

We should probably free this in kvm_arch_vcpu_free().

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH -next] KVM: arm/arm64: Remove unused timer variable

2019-02-22 Thread Julien Thierry
Hi Shaokun,

On 22/02/2019 06:34, Shaokun Zhang wrote:
> The 'timer' local variable became unused after commit bee038a67487
> ("KVM: arm/arm64: Rework the timer code to use a timer_map").
> Remove it to avoid [-Wunused-but-set-variable] warning.
> 
> Cc: Christoffer Dall 
> Cc: Marc Zyngier 
> Cc: James Morse 
> Cc: Julien Thierry 
> Cc: Suzuki K Pouloze 
> Signed-off-by: Shaokun Zhang 
> ---
>  virt/kvm/arm/arch_timer.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
> index af8f2f1d01cc..3417f2dbc366 100644
> --- a/virt/kvm/arm/arch_timer.c
> +++ b/virt/kvm/arm/arch_timer.c
> @@ -236,14 +236,12 @@ static enum hrtimer_restart kvm_hrtimer_expire(struct 
> hrtimer *hrt)
>  
>  static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
>  {
> - struct arch_timer_cpu *timer;
>   enum kvm_arch_timers index;
>   u64 cval, now;
>  
>   if (!timer_ctx)
>   return false;
>  
> - timer = vcpu_timer(timer_ctx->vcpu);
>   index = arch_timer_ctx_index(timer_ctx);
>  
>   if (timer_ctx->loaded) {

This seems to be a consequence of moving the loaded state from
arch_timer_cpu to arch_timer_context. Thanks for fixing it.

Reviewed-by: Julien Thierry 

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 22/26] KVM: arm64/sve: Add pseudo-register for the guest's vector lengths

2019-02-21 Thread Julien Thierry
;
> + int ret;
>   const char __user *uptr = (const char __user *)reg->addr;
>  
> - if (!vcpu_has_sve(vcpu) || sve_reg_region(, vcpu, reg))
> + if (!vcpu_has_sve(vcpu))
> + return -ENOENT;
> +
> + if (reg->id == KVM_REG_ARM64_SVE_VLS)
> + return set_sve_vls(vcpu, reg);
> +
> + /* Finalize the number of slices per SVE register: */
> + ret = kvm_arm_vcpu_finalize(vcpu);
> + if (ret)
> + return ret;
> +
> + if (sve_reg_region(, vcpu, reg))
>   return -ENOENT;
>  
>   if (copy_from_user(vcpu->arch.sve_state + region.koffset, uptr,
> @@ -452,30 +551,43 @@ static unsigned long num_sve_regs(const struct kvm_vcpu 
> *vcpu)
>   if (!vcpu_has_sve(vcpu))
>   return 0;
>  
> - return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */);
> + return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */)
> + + 1; /* KVM_REG_ARM64_SVE_VLS */
>  }
>  
>  static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu, u64 __user 
> **uind)
>  {
>   /* Only the first slice ever exists, for now */
>   const unsigned int slices = 1;
> + u64 reg;
>   unsigned int i, n;
>  
>   if (!vcpu_has_sve(vcpu))
>   return 0;
>  
> + /*
> +  * Enumerate this first, so that userspace can save/restore in
> +  * the order reported by KVM_GET_REG_LIST:
> +  */
> + reg = KVM_REG_ARM64_SVE_VLS;
> + if (put_user(reg, (*uind)++))
> + return -EFAULT;
> +
>   for (i = 0; i < slices; i++) {
>   for (n = 0; n < SVE_NUM_ZREGS; n++) {
> - if (put_user(KVM_REG_ARM64_SVE_ZREG(n, i), (*uind)++))
> + reg = KVM_REG_ARM64_SVE_ZREG(n, i);
> + if (put_user(reg, (*uind)++))
>   return -EFAULT;
>   }
>  
>   for (n = 0; n < SVE_NUM_PREGS; n++) {
> - if (put_user(KVM_REG_ARM64_SVE_PREG(n, i), (*uind)++))
> + reg = KVM_REG_ARM64_SVE_PREG(n, i);
> + if (put_user(reg, (*uind)++))
>   return -EFAULT;
>   }
>  
> - if (put_user(KVM_REG_ARM64_SVE_FFR(i), (*uind)++))
> + reg = KVM_REG_ARM64_SVE_FFR(i);
> + if (put_user(reg, (*uind)++))
>   return -EFAULT;
>   }
>  
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index b72a3dd..1379fb2 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -98,6 +98,15 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, 
> long ext)
>   return r;
>  }
>  
> +int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu)
> +{
> + if (likely(kvm_arm_vcpu_finalized(vcpu)))
> + return 0;
> +
> + vcpu->arch.flags |= KVM_ARM64_VCPU_FINALIZED;
> + return 0;
> +}
> +

I think that the introduction of this flag and setting it should be part
of the previous patch.

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 19/26] KVM: arm64: Enumerate SVE register indices for KVM_GET_REG_LIST

2019-02-21 Thread Julien Thierry



On 18/02/2019 19:52, Dave Martin wrote:
> This patch includes the SVE register IDs in the list returned by
> KVM_GET_REG_LIST, as appropriate.
> 
> On a non-SVE-enabled vcpu, no new IDs are added.
> 
> On an SVE-enabled vcpu, IDs for the FPSIMD V-registers are removed
> from the list, since userspace is required to access the Z-
> registers instead to access their context.  For the variably-sized
> SVE registers, the appropriate set of slice IDs are enumerated,
> depending on the maximum vector length for the vcpu.
> 
> Signed-off-by: Dave Martin 

Reviewed-by: Julien Thierry 

> 
> ---
> 
> Changes since v4:
> 
>  * Drop KVM_SVE_SLICES(), which is no longer used due to the dropping of
>register multi-slice support from the series.
> 
>  * Drop register multi-slice support.
> ---
>  arch/arm64/kvm/guest.c | 51 
> ++
>  1 file changed, 51 insertions(+)
> 
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index 8cfa889..4a2ad60 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -366,6 +366,14 @@ static int copy_core_reg_indices(const struct kvm_vcpu 
> *vcpu,
>   continue;
>   }
>  
> + /*
> +  * The KVM_REG_ARM64_SVE regs must be used instead of
> +  * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on
> +  * SVE-enabled vcpus:
> +  */
> + if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(i))
> + continue;
> +
>   if (uind) {
>   if (put_user(reg, *uind))
>   return -EFAULT;
> @@ -436,6 +444,44 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const 
> struct kvm_one_reg *reg)
>   return copy_to_user(uaddr, , KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
>  }
>  
> +static unsigned long num_sve_regs(const struct kvm_vcpu *vcpu)
> +{
> + /* Only the first slice ever exists, for now */
> + const unsigned int slices = 1;
> +
> + if (!vcpu_has_sve(vcpu))
> + return 0;
> +
> + return slices * (SVE_NUM_PREGS + SVE_NUM_ZREGS + 1 /* FFR */);
> +}
> +
> +static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu, u64 __user 
> **uind)
> +{
> + /* Only the first slice ever exists, for now */
> + const unsigned int slices = 1;
> + unsigned int i, n;
> +
> + if (!vcpu_has_sve(vcpu))
> + return 0;
> +
> + for (i = 0; i < slices; i++) {
> + for (n = 0; n < SVE_NUM_ZREGS; n++) {
> + if (put_user(KVM_REG_ARM64_SVE_ZREG(n, i), (*uind)++))
> + return -EFAULT;
> + }
> +
> + for (n = 0; n < SVE_NUM_PREGS; n++) {
> + if (put_user(KVM_REG_ARM64_SVE_PREG(n, i), (*uind)++))
> + return -EFAULT;
> + }
> +
> + if (put_user(KVM_REG_ARM64_SVE_FFR(i), (*uind)++))
> + return -EFAULT;
> + }
> +
> + return 0;
> +}
> +
>  /**
>   * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
>   *
> @@ -446,6 +492,7 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
>   unsigned long res = 0;
>  
>   res += num_core_regs(vcpu);
> + res += num_sve_regs(vcpu);
>   res += kvm_arm_num_sys_reg_descs(vcpu);
>   res += kvm_arm_get_fw_num_regs(vcpu);
>   res += NUM_TIMER_REGS;
> @@ -466,6 +513,10 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 
> __user *uindices)
>   if (ret < 0)
>   return ret;
>  
> + ret = copy_sve_reg_indices(vcpu, );
> + if (ret)
> + return ret;
> +
>   ret = kvm_arm_copy_fw_reg_indices(vcpu, uindices);
>   if (ret)
>   return ret;
> 

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 18/26] KVM: arm64/sve: Add SVE support to register access ioctl interface

2019-02-21 Thread Julien Thierry
g in user memory */
> +};
> +
> +/* Get sanitised bounds for user/kernel SVE register copy */
> +static int sve_reg_region(struct sve_state_region *region,

I feel that sve_reg_to_region or sve_reg_get_region would be a clearer name.

Cheers,

Julien

> +   struct kvm_vcpu *vcpu,
> +   const struct kvm_one_reg *reg)
> +{
> + /* reg ID ranges for Z- registers */
> + const u64 zreg_id_min = KVM_REG_ARM64_SVE_ZREG(0, 0);
> + const u64 zreg_id_max = KVM_REG_ARM64_SVE_ZREG(SVE_NUM_ZREGS - 1,
> +SVE_NUM_SLICES - 1);
> +
> + /* reg ID ranges for P- registers and FFR (which are contiguous) */
> + const u64 preg_id_min = KVM_REG_ARM64_SVE_PREG(0, 0);
> + const u64 preg_id_max = KVM_REG_ARM64_SVE_FFR(SVE_NUM_SLICES - 1);
> +
> + unsigned int vq;
> + unsigned int reg_num;
> +
> + unsigned int reqoffset, reqlen; /* User-requested offset and length */
> + unsigned int maxlen; /* Maxmimum permitted length */
> +
> + size_t sve_state_size;
> +
> + /* Only the first slice ever exists, for now: */
> + if ((reg->id & SVE_REG_SLICE_MASK) != 0)
> + return -ENOENT;
> +
> + vq = sve_vq_from_vl(vcpu->arch.sve_max_vl);
> +
> + reg_num = (reg->id & SVE_REG_ID_MASK) >> SVE_REG_ID_SHIFT;
> +
> + if (reg->id >= zreg_id_min && reg->id <= zreg_id_max) {
> + reqoffset = SVE_SIG_ZREG_OFFSET(vq, reg_num) -
> + SVE_SIG_REGS_OFFSET;
> + reqlen = KVM_SVE_ZREG_SIZE;
> + maxlen = SVE_SIG_ZREG_SIZE(vq);
> + } else if (reg->id >= preg_id_min && reg->id <= preg_id_max) {
> + reqoffset = SVE_SIG_PREG_OFFSET(vq, reg_num) -
> + SVE_SIG_REGS_OFFSET;
> + reqlen = KVM_SVE_PREG_SIZE;
> + maxlen = SVE_SIG_PREG_SIZE(vq);
> + } else {
> + return -ENOENT;
> + }
> +
> + sve_state_size = vcpu_sve_state_size(vcpu);
> + if (!sve_state_size)
> + return -EINVAL;
> +
> + region->koffset = array_index_nospec(reqoffset, sve_state_size);
> + region->klen = min(maxlen, reqlen);
> + region->upad = reqlen - region->klen;
> +
> + return 0;
> +}
> +
> +static int get_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
> +{
> + struct sve_state_region region;
> + char __user *uptr = (char __user *)reg->addr;
> +
> + if (!vcpu_has_sve(vcpu) || sve_reg_region(, vcpu, reg))
> + return -ENOENT;
> +
> + if (copy_to_user(uptr, vcpu->arch.sve_state + region.koffset,
> +  region.klen) ||
> + clear_user(uptr + region.klen, region.upad))
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> +static int set_sve_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
> +{
> + struct sve_state_region region;
> + const char __user *uptr = (const char __user *)reg->addr;
> +
> + if (!vcpu_has_sve(vcpu) || sve_reg_region(, vcpu, reg))
> + return -ENOENT;
> +
> + if (copy_from_user(vcpu->arch.sve_state + region.koffset, uptr,
> +region.klen))
> + return -EFAULT;
> +
> + return 0;
> +}
> +
>  int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs 
> *regs)
>  {
>   return -EINVAL;
> @@ -371,12 +485,12 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct 
> kvm_one_reg *reg)
>   if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
>   return -EINVAL;
>  
> - /* Register group 16 means we want a core register. */
> - if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
> - return get_core_reg(vcpu, reg);
> -
> - if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_FW)
> - return kvm_arm_get_fw_reg(vcpu, reg);
> + switch (reg->id & KVM_REG_ARM_COPROC_MASK) {
> + case KVM_REG_ARM_CORE:  return get_core_reg(vcpu, reg);
> + case KVM_REG_ARM_FW:return kvm_arm_get_fw_reg(vcpu, reg);
> + case KVM_REG_ARM64_SVE: return get_sve_reg(vcpu, reg);
> + default: break; /* fall through */
> + }
>  
>   if (is_timer_reg(reg->id))
>   return get_timer_reg(vcpu, reg);
> @@ -390,12 +504,12 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct 
> kvm_one_reg *reg)
>       if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
>   return -EINVAL;
>  
> - /*

Re: [PATCH v5 17/26] KVM: arm64: Reject ioctl access to FPSIMD V-regs on SVE vcpus

2019-02-21 Thread Julien Thierry



On 18/02/2019 19:52, Dave Martin wrote:
> In order to avoid the pointless complexity of maintaining two ioctl
> register access views of the same data, this patch blocks ioctl
> access to the FPSIMD V-registers on vcpus that support SVE.
> 
> This will make it more straightforward to add SVE register access
> support.
> 
> Since SVE is an opt-in feature for userspace, this will not affect
> existing users.
> 
> Signed-off-by: Dave Martin 

Reviewed-by: Julien Thierry 

> ---
>  arch/arm64/kvm/guest.c | 38 +++---
>  1 file changed, 27 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index f83fe22..f491456 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -95,7 +95,14 @@ static int core_reg_size_from_offset(u64 off)
>   return -EINVAL;
>  }
>  
> -static int validate_core_offset(const struct kvm_one_reg *reg)
> +static bool core_reg_offset_is_vreg(u64 off)
> +{
> + return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) &&
> + off < KVM_REG_ARM_CORE_REG(fp_regs.fpsr);
> +}
> +
> +static int validate_core_offset(const struct kvm_vcpu *vcpu,
> + const struct kvm_one_reg *reg)
>  {
>   u64 off = core_reg_offset_from_id(reg->id);
>   int size = core_reg_size_from_offset(off);
> @@ -103,10 +110,18 @@ static int validate_core_offset(const struct 
> kvm_one_reg *reg)
>   if (size < 0)
>   return -EINVAL;
>  
> - if (KVM_REG_SIZE(reg->id) == size)
> - return 0;
> + if (KVM_REG_SIZE(reg->id) != size)
> + return -EINVAL;
>  
> - return -EINVAL;
> + /*
> +  * The KVM_REG_ARM64_SVE regs must be used instead of
> +  * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on
> +  * SVE-enabled vcpus:
> +  */
> + if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off))
> + return -EINVAL;
> +
> + return 0;
>  }
>  
>  static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
> @@ -128,7 +143,7 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const 
> struct kvm_one_reg *reg)
>   (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
>   return -ENOENT;
>  
> - if (validate_core_offset(reg))
> + if (validate_core_offset(vcpu, reg))
>   return -EINVAL;
>  
>   if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
> @@ -153,7 +168,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const 
> struct kvm_one_reg *reg)
>   (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
>   return -ENOENT;
>  
> - if (validate_core_offset(reg))
> + if (validate_core_offset(vcpu, reg))
>   return -EINVAL;
>  
>   if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
> @@ -206,7 +221,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>   return -EINVAL;
>  }
>  
> -static int copy_core_reg_indices(u64 __user **uind)
> +static int copy_core_reg_indices(const struct kvm_vcpu *vcpu,
> +  u64 __user **uind)
>  {
>   unsigned int i;
>   int n = 0;
> @@ -248,9 +264,9 @@ static int copy_core_reg_indices(u64 __user **uind)
>   return n;
>  }
>  
> -static unsigned long num_core_regs(void)
> +static unsigned long num_core_regs(const struct kvm_vcpu *vcpu)
>  {
> - return copy_core_reg_indices(NULL);
> + return copy_core_reg_indices(vcpu, NULL);
>  }
>  
>  /**
> @@ -315,7 +331,7 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
>  {
>   unsigned long res = 0;
>  
> - res += num_core_regs();
> + res += num_core_regs(vcpu);
>   res += kvm_arm_num_sys_reg_descs(vcpu);
>   res += kvm_arm_get_fw_num_regs(vcpu);
>   res += NUM_TIMER_REGS;
> @@ -332,7 +348,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 
> __user *uindices)
>  {
>   int ret;
>  
> - ret = copy_core_reg_indices();
> + ret = copy_core_reg_indices(vcpu, );
>   if (ret < 0)
>   return ret;
>  
> 

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 13/26] KVM: arm64/sve: System register context switch and access support

2019-02-20 Thread Julien Thierry



On 18/02/2019 19:52, Dave Martin wrote:
> This patch adds the necessary support for context switching ZCR_EL1
> for each vcpu.
> 
> ZCR_EL1 is trapped alongside the FPSIMD/SVE registers, so it makes
> sense for it to be handled as part of the guest FPSIMD/SVE context
> for context switch purposes instead of handling it as a general
> system register.  This means that it can be switched in lazily at
> the appropriate time.  No effort is made to track host context for
> this register, since SVE requires VHE: thus the hosts's value for
> this register lives permanently in ZCR_EL2 and does not alias the
> guest's value at any time.
> 
> The Hyp switch and fpsimd context handling code is extended
> appropriately.
> 
> Accessors are added in sys_regs.c to expose the SVE system
> registers and ID register fields.  Because these need to be
> conditionally visible based on the guest configuration, they are
> implemented separately for now rather than by use of the generic
> system register helpers.  This may be abstracted better later on
> when/if there are more features requiring this model.
> 
> ID_AA64ZFR0_EL1 is RO-RAZ for MRS/MSR when SVE is disabled for the
> guest, but for compatibility with non-SVE aware KVM implementations
> the register should not be enumerated at all for KVM_GET_REG_LIST
> in this case.  For consistency we also reject ioctl access to the
> register.  This ensures that a non-SVE-enabled guest looks the same
> to userspace, irrespective of whether the kernel KVM implementation
> supports SVE.
> 
> Signed-off-by: Dave Martin 
> 

Reviewed-by: Julien Thierry 

> ---
> 
> Changes since v4:
> 
>  * Remove annoying linebreak in assignment.
> 
>  * Remove #ifdef clutter and migrate to using the new sysreg
>restrictions() framework to control register visibility.
> ---
>  arch/arm64/include/asm/kvm_host.h |  1 +
>  arch/arm64/include/asm/sysreg.h   |  3 ++
>  arch/arm64/kvm/fpsimd.c   |  9 +++-
>  arch/arm64/kvm/hyp/switch.c   |  3 ++
>  arch/arm64/kvm/sys_regs.c | 97 
> +--
>  5 files changed, 107 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index af625a8..c32f195 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -112,6 +112,7 @@ enum vcpu_sysreg {
>   SCTLR_EL1,  /* System Control Register */
>   ACTLR_EL1,  /* Auxiliary Control Register */
>   CPACR_EL1,  /* Coprocessor Access Control */
> + ZCR_EL1,/* SVE Control */
>   TTBR0_EL1,  /* Translation Table Base Register 0 */
>   TTBR1_EL1,  /* Translation Table Base Register 1 */
>   TCR_EL1,/* Translation Control Register */
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 72dc4c0..da38491 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -449,6 +449,9 @@
>  #define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6)
>  #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
>  
> +/* VHE encodings for architectural EL0/1 system registers */
> +#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
> +
>  /* Common SCTLR_ELx flags. */
>  #define SCTLR_ELx_DSSBS  (_BITUL(44))
>  #define SCTLR_ELx_ENIA   (_BITUL(31))
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 1cf4f02..7053bf4 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -103,14 +103,21 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>  void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
>  {
>   unsigned long flags;
> + bool host_has_sve = system_supports_sve();
> + bool guest_has_sve = vcpu_has_sve(vcpu);
>  
>   local_irq_save(flags);
>  
>   if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
> + u64 *guest_zcr = >arch.ctxt.sys_regs[ZCR_EL1];
> +
>   /* Clean guest FP state to memory and invalidate cpu view */
>   fpsimd_save();
>   fpsimd_flush_cpu_state();
> - } else if (system_supports_sve()) {
> +
> + if (guest_has_sve)
> + *guest_zcr = read_sysreg_s(SYS_ZCR_EL12);
> + } else if (host_has_sve) {
>   /*
>* The FPSIMD/SVE state in the CPU has not been touched, and we
>* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index b0b1478..9f07403 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -347,6 +347,9 @@ s

Re: [PATCH v5 14/26] KVM: arm64/sve: Context switch the SVE registers

2019-02-20 Thread Julien Thierry



On 18/02/2019 19:52, Dave Martin wrote:
> In order to give each vcpu its own view of the SVE registers, this
> patch adds context storage via a new sve_state pointer in struct
> vcpu_arch.  An additional member sve_max_vl is also added for each
> vcpu, to determine the maximum vector length visible to the guest
> and thus the value to be configured in ZCR_EL2.LEN while the is

"While the  is active"?

> active.  This also determines the layout and size of the storage in
> sve_state, which is read and written by the same backend functions
> that are used for context-switching the SVE state for host tasks.
> 
> On SVE-enabled vcpus, SVE access traps are now handled by switching
> in the vcpu's SVE context and disabling the trap before returning
> to the guest.  On other vcpus, the trap is not handled and an exit
> back to the host occurs, where the handle_sve() fallback path
> reflects an undefined instruction exception back to the guest,
> consistently with the behaviour of non-SVE-capable hardware (as was
> done unconditionally prior to this patch).
> 
> No SVE handling is added on non-VHE-only paths, since VHE is an
> architectural and Kconfig prerequisite of SVE.
> 
> Signed-off-by: Dave Martin 
> 

Otherwise:

Reviewed-by: Julien Thierry 

> ---
> 
> Changes since v4:
> 
>  * Remove if_sve() helper in favour of open-coded static key checks.
> 
>  * Explicitly merge static key checks and other condition checks to
>reduce overhead and maximise const-folding and specialisation
>opportunities in the compiler.
> ---
>  arch/arm64/include/asm/kvm_host.h |  6 
>  arch/arm64/kvm/fpsimd.c   |  5 +--
>  arch/arm64/kvm/hyp/switch.c   | 70 
> +--
>  3 files changed, 61 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index c32f195..77b6f3e 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -212,6 +212,8 @@ typedef struct kvm_cpu_context kvm_cpu_context_t;
>  
>  struct kvm_vcpu_arch {
>   struct kvm_cpu_context ctxt;
> + void *sve_state;
> + unsigned int sve_max_vl;
>  
>   /* HYP configuration */
>   u64 hcr_el2;
> @@ -304,6 +306,10 @@ struct kvm_vcpu_arch {
>   bool sysregs_loaded_on_cpu;
>  };
>  
> +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> +   sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> +
>  /* vcpu_arch flags field values: */
>  #define KVM_ARM64_DEBUG_DIRTY(1 << 0)
>  #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 7053bf4..6e3c9c8 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -87,10 +87,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>  
>   if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
>   fpsimd_bind_state_to_cpu(>arch.ctxt.gp_regs.fp_regs,
> -  NULL, SVE_VL_MIN);
> +  vcpu->arch.sve_state,
> +  vcpu->arch.sve_max_vl);
>  
>   clear_thread_flag(TIF_FOREIGN_FPSTATE);
> - clear_thread_flag(TIF_SVE);
> + update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
>   }
>  }
>  
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 9f07403..cdc9063 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -98,7 +98,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
>   val = read_sysreg(cpacr_el1);
>   val |= CPACR_EL1_TTA;
>   val &= ~CPACR_EL1_ZEN;
> - if (!update_fp_enabled(vcpu)) {
> + if (update_fp_enabled(vcpu)) {
> + if (vcpu_has_sve(vcpu))
> + val |= CPACR_EL1_ZEN;
> + } else {
>   val &= ~CPACR_EL1_FPEN;
>   __activate_traps_fpsimd32(vcpu);
>   }
> @@ -313,16 +316,43 @@ static bool __hyp_text __populate_fault_info(struct 
> kvm_vcpu *vcpu)
>   return true;
>  }
>  
> -static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
> +/* Check for an FPSIMD/SVE trap and handle as appropriate */
> +static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
>  {
> - struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
> + bool vhe, sve_guest, sve_host;
> + u8 trap_class;
>  
> - if (has_

Re: [PATCH v5 12/26] KVM: arm64: Support runtime sysreg visibility filtering

2019-02-20 Thread Julien Thierry



On 18/02/2019 19:52, Dave Martin wrote:
> Some optional features of the Arm architecture add new system
> registers that are not present in the base architecture.
> 
> Where these features are optional for the guest, the visibility of
> these registers may need to depend on some runtime configuration,
> such as a flag passed to KVM_ARM_VCPU_INIT.
> 
> For example, ZCR_EL1 and ID_AA64ZFR0_EL1 need to be hidden if SVE
> is not enabled for the guest, even though these registers may be
> present in the hardware and visible to the host at EL2.
> 
> Adding special-case checks all over the place for individual
> registers is going to get messy as the number of conditionally-
> visible registers grows.
> 
> In order to help solve this problem, this patch adds a new sysreg
> method restrictions() that can be used to hook in any needed
> runtime visibility checks.  This method can currently return
> REG_NO_USER to inhibit enumeration and ioctl access to the register
> for userspace, and REG_NO_GUEST to inhibit runtime access by the
> guest using MSR/MRS.
> 
> This allows a conditionally modified view of individual system
> registers such as the CPU ID registers, in addition to completely
> hiding register where appropriate.
> 
> Signed-off-by: Dave Martin 

Reviewed-by: Julien Thierry 

> 
> ---
> 
> Changes since v4:
> 
>  * Move from a boolean sysreg property that just suppresses register
>enumeration via KVM_GET_REG_LIST, to a multi-flag property that
>allows independent runtime control of MRS/MSR and user ioctl access.
> 
>This allows registers to be either hidden completely, or to have
>hybrid behaviours (such as the not-enumerated, RAZ, WAZ behaviour of
>"non-present" CPU ID regs).
> ---
>  arch/arm64/kvm/sys_regs.c | 24 +---
>  arch/arm64/kvm/sys_regs.h | 13 +
>  2 files changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 71c5825..3f1243e 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -1863,6 +1863,12 @@ static void perform_access(struct kvm_vcpu *vcpu,
>  {
>   trace_kvm_sys_access(*vcpu_pc(vcpu), params, r);
>  
> + /* Check for regs disabled by runtime config */
> + if (restrictions(vcpu, r) & REG_NO_GUEST) {
> + kvm_inject_undefined(vcpu);
> + return;
> + }
> +
>   /*
>* Not having an accessor means that we have configured a trap
>* that we don't know how to handle. This certainly qualifies
> @@ -2370,6 +2376,10 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, 
> const struct kvm_one_reg *reg
>   if (!r)
>   return get_invariant_sys_reg(reg->id, uaddr);
>  
> + /* Check for regs disabled by runtime config */
> + if (restrictions(vcpu, r) & REG_NO_USER)
> + return -ENOENT;
> +
>   if (r->get_user)
>   return (r->get_user)(vcpu, r, reg, uaddr);
>  
> @@ -2391,6 +2401,10 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, 
> const struct kvm_one_reg *reg
>   if (!r)
>   return set_invariant_sys_reg(reg->id, uaddr);
>  
> + /* Check for regs disabled by runtime config */
> + if (restrictions(vcpu, r) & REG_NO_USER)
> + return -ENOENT;
> +
>   if (r->set_user)
>   return (r->set_user)(vcpu, r, reg, uaddr);
>  
> @@ -2447,7 +2461,8 @@ static bool copy_reg_to_user(const struct sys_reg_desc 
> *reg, u64 __user **uind)
>   return true;
>  }
>  
> -static int walk_one_sys_reg(const struct sys_reg_desc *rd,
> +static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
> + const struct sys_reg_desc *rd,
>   u64 __user **uind,
>   unsigned int *total)
>  {
> @@ -2458,6 +2473,9 @@ static int walk_one_sys_reg(const struct sys_reg_desc 
> *rd,
>   if (!(rd->reg || rd->get_user))
>   return 0;
>  
> + if (restrictions(vcpu, rd) & REG_NO_USER)
> + return 0;
> +
>   if (!copy_reg_to_user(rd, uind))
>   return -EFAULT;
>  
> @@ -2486,9 +2504,9 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 
> __user *uind)
>   int cmp = cmp_sys_reg(i1, i2);
>   /* target-specific overrides generic entry. */
>   if (cmp <= 0)
> - err = walk_one_sys_reg(i1, , );
> + err = walk_one_sys_reg(vcpu, i1, , );
>   else
> - err = walk_one_sys_reg(i2, , );
> + err = walk_one_sy

Re: [PATCH v5 11/26] KVM: arm64: Extend reset_unknown() to handle mixed RES0/UNKNOWN registers

2019-02-20 Thread Julien Thierry
Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:
> The reset_unknown() system register helper initialises a guest
> register to a distinctive junk value on vcpu reset, to help expose
> and debug deficient register initialisation within the guest.
> 
> Some registers such as the SVE control register ZCR_EL1 contain a
> mixture of UNKNOWN fields and RES0 bits.  For these,
> reset_unknown() does not work at present, since it sets all bits to
> junk values instead of just the wanted bits.
> 
> There is no need to craft another special helper just for that,
> since reset_unknown() almost does the appropriate thing anyway.
> This patch takes advantage of the unused val field in struct
> sys_reg_desc to specify a mask of bits that should be initialised
> to zero instead of junk.
> 
> All existing users of reset_unknown() do not (and should not)
> define a value for val, so they will implicitly set it to zero,
> resulting in all bits being made UNKNOWN by this function: thus,
> this patch makes no functional change for currently defined
> registers.
> 
> Future patches will make use of non-zero val.
> 
> Signed-off-by: Dave Martin 
> ---
>  arch/arm64/kvm/sys_regs.h | 11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
> index 3b1bc7f..174ffc0 100644
> --- a/arch/arm64/kvm/sys_regs.h
> +++ b/arch/arm64/kvm/sys_regs.h
> @@ -56,7 +56,12 @@ struct sys_reg_desc {
>   /* Index into sys_reg[], or 0 if we don't need to save it. */
>   int reg;
>  
> - /* Value (usually reset value) */
> + /*
> +  * Value (usually reset value)
> +  * For reset_unknown, each bit set to 1 in val is treated as
> +  * RES0 in the register: the corresponding register bit is
> +  * reset to 0 instead of "unknown".
> +  */

Seeing there are users of this field, I find this a bit fragile. Is
there a reason not to add a separate "u64 res0_mask;" ?

The sys_reg_desc structures are instantiated once as constants for the
whole system rather than per VM/VCPU. Would it be really bad to add a
64bit field there?

>   u64 val;
>  
>   /* Custom get/set_user functions, fallback to generic if NULL */
> @@ -92,7 +97,9 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
>  {
>   BUG_ON(!r->reg);
>   BUG_ON(r->reg >= NR_SYS_REGS);
> - __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
> +
> + /* If non-zero, r->val specifies which register bits are RES0: */
> + __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL & ~r->val;
>  }
>  
>  static inline void reset_val(struct kvm_vcpu *vcpu, const struct 
> sys_reg_desc *r)
> 

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 07/26] arm64/sve: Clarify role of the VQ map maintenance functions

2019-02-20 Thread Julien Thierry



On 18/02/2019 19:52, Dave Martin wrote:
> The roles of sve_init_vq_map(), sve_update_vq_map() and
> sve_verify_vq_map() are highly non-obvious to anyone who has not dug
> through cpufeatures.c in detail.
> 
> Since the way these functions interact with each other is more
> important here than a full understanding of the cpufeatures code, this
> patch adds comments to make the functions' roles clearer.
> 
> No functional change.
> 
> Signed-off-by: Dave Martin 
> ---
>  arch/arm64/kernel/fpsimd.c | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 64729e2..92c2331 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -647,6 +647,10 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, 
> SVE_VQ_MAX))
>   }
>  }
>  
> +/*
> + * Initialise the set of known supported VQs for the boot CPU.
> + * This is called during kernel boot, before secondary CPUs are brought up.
> + */
>  void __init sve_init_vq_map(void)
>  {
>   sve_probe_vqs(sve_vq_map);
> @@ -656,6 +660,7 @@ void __init sve_init_vq_map(void)
>  /*
>   * If we haven't committed to the set of supported VQs yet, filter out
>   * those not supported by the current CPU.
> + * This function is called during the bring-up of early secondary CPUs only.
>   */
>  void sve_update_vq_map(void)
>  {
> @@ -666,7 +671,10 @@ void sve_update_vq_map(void)
>   bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX);
>  }
>  
> -/* Check whether the current CPU supports all VQs in the committed set */
> +/*
> + * Check whether the current CPU supports all VQs in the committed set.
> + * This function is called during the bring-up of late secondary CPUs only.

Oh I see, this is for late CPUs. So you can probably disregard my
comment on the warning in the previous patch.

If you respin this series, I feel it would be more useful to have this
patch before the current patch 6.

Reviewed-by: Julien Thierry 

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 06/26] arm64/sve: Check SVE virtualisability

2019-02-20 Thread Julien Thierry
secondary_vq_map);
> - bitmap_andnot(sve_secondary_vq_map, sve_vq_map, sve_secondary_vq_map,
> -   SVE_VQ_MAX);
> - if (!bitmap_empty(sve_secondary_vq_map, SVE_VQ_MAX)) {
> + sve_probe_vqs(tmp_map);
> +
> + bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
> + if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) {
>   pr_warn("SVE: cpu%d: Required vector length(s) missing\n",
>   smp_processor_id());
> - ret = -EINVAL;
> + return -EINVAL;
>   }
>  
> - return ret;
> + if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
> + return 0;
> +
> + /*
> +  * For KVM, it is necessary to ensure that this CPU doesn't
> +  * support any vector length that guests may have probed as
> +  * unsupported.
> +  */
> +
> + /* Recover the set of supported VQs: */
> + bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX);
> + /* Find VQs supported that are not globally supported: */
> + bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX);
> +
> + /* Find the lowest such VQ, if any: */
> + b = find_last_bit(tmp_map, SVE_VQ_MAX);
> + if (b >= SVE_VQ_MAX)
> + return 0; /* no mismatches */
> +
> + /*
> +  * Mismatches above sve_max_virtualisable_vl are fine, since
> +  * no guest is allowed to configure ZCR_EL2.LEN to exceed this:
> +  */
> +     if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
> + pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",

Nit: might be good to specify that the vector length is unsupported for
virtualisation.

Also, since KVM is the one deciding what to do with the information,
should we have a warning here? But I can understand that knowing which
CPUs are introducing unsupported vector length, maybe using pr_devel()
instead of pr_warn()


In any case, the logic looks good to me:

Reviewed-by: Julien Thierry 

Cheers,

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm/arm64: arch_timer: Mark physical interrupt active when a virtual interrupt is pending

2019-02-12 Thread Julien Thierry
Hi Marc,

On 08/02/2019 14:43, Marc Zyngier wrote:
> When a guest gets scheduled, KVM performs a "load" operation,
> which for the timer includes evaluating the virtual "active" state
> of the interrupt, and replicating it on the physical side. This
> ensures that the deactivation in the guest will also take place
> in the physical GIC distributor.
> 
> If the interrupt is not yet active, we flag it as inactive on the
> physical side.  This means that on restoring the timer registers,
> if the timer has expired, we'll immediately take an interrupt.
> That's absolutely fine, as the interrupt will then be flagged as
> active on the physical side. What this assumes though is that we'll
> enter the guest right after having taken the interrupt, and that
> the guest will quickly ACK the interrupt, making it active at on

Nit: "at on" -> pick one

> the virtual side.
> 
> It turns out that quite often, this assumption doesn't really hold.
> The guest may be preempted on the back on this interrupt, either

on the back of*

> from kernel space or whilst running at EL1 when a host interrupt
> fires. When this happens, we repeat the whole sequence on the
> next load (interrupt marked as inactive, timer registers restored,
> interrupt fires). And if it takes a really long time for a guest
> to activate the interrupt (as it does with nested virt), we end-up
> with many such events in quick succession, leading to the guest only
> making very slow progress.
> 
> This can also be seen with the number of virtual timer interrupt on the
> host being far greater than the same number in the guest.
> 
> An easy way to fix this is to evaluate the timer state when performing
> the "load" operation, just like we do when the interrupt actually fires.
> If the timer has a pending virtual interrupt at this stage, then we
> can safely flag the physical interrupt as being active, which prevents
> spurious exits.
> 
> Signed-off-by: Marc Zyngier 

Otherwise, I think the change makes sense:

Reviewed-by: Julien Thierry 

Cheers,

> ---
>  virt/kvm/arm/arch_timer.c | 15 ---
>  1 file changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
> index 7449651ae2e5..70c18479ccd5 100644
> --- a/virt/kvm/arm/arch_timer.c
> +++ b/virt/kvm/arm/arch_timer.c
> @@ -487,12 +487,21 @@ static inline void set_timer_irq_phys_active(struct 
> arch_timer_context *ctx, boo
>  static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
>  {
>   struct kvm_vcpu *vcpu = ctx->vcpu;
> - bool phys_active;
> + bool phys_active = false;
> +
> + /*
> +  * Update the timer output so that it is likely to match the
> +  * state we're about to restore. If the timer expires between
> +  * this point and the register restoration, we'll take the
> +  * interrupt anyway.
> +  */
> + kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx);
>  
>   if (irqchip_in_kernel(vcpu->kvm))
>   phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq);
> - else
> - phys_active = ctx->irq.level;
> +
> + phys_active |= ctx->irq.level;
> +
>   set_timer_irq_phys_active(ctx, phys_active);
>  }
>  
> 

-- 
Julien Thierry
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


  1   2   3   >