Re: [PATCH v3 01/41] KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls than KVM_RUN

2018-02-05 Thread Julien Grall

Hi Christoffer,

On 12/01/18 12:07, Christoffer Dall wrote:

Calling vcpu_load() registers preempt notifiers for this vcpu and calls
kvm_arch_vcpu_load().  The latter will soon be doing a lot of heavy
lifting on arm/arm64 and will try to do things such as enabling the
virtual timer and setting us up to handle interrupts from the timer
hardware.

Loading state onto hardware registers and enabling hardware to signal
interrupts can be problematic when we're not actually about to run the
VCPU, because it makes it difficult to establish the right context when
handling interrupts from the timer, and it makes the register access
code difficult to reason about.

Luckily, now when we call vcpu_load in each ioctl implementation, we can
simply remove the call from the non-KVM_RUN vcpu ioctls, and our
kvm_arch_vcpu_load() is only used for loading vcpu content to the
physical CPU when we're actually going to run the vcpu.

Signed-off-by: Christoffer Dall 


Reviewed-by: Julien Grall 

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 02/41] KVM: arm/arm64: Move vcpu_load call after kvm_vcpu_first_run_init

2018-02-05 Thread Julien Grall

Hi Christoffer,

On 12/01/18 12:07, Christoffer Dall wrote:

Moving the call to vcpu_load() in kvm_arch_vcpu_ioctl_run() to after
we've called kvm_vcpu_first_run_init() simplifies some of the vgic and
there is also no need to do vcpu_load() for things such as handling the
immediate_exit flag.

Signed-off-by: Christoffer Dall 


Reviewed-by: Julien Grall 

Cheers,


---
  virt/kvm/arm/arch_timer.c |  7 ---
  virt/kvm/arm/arm.c| 22 --
  virt/kvm/arm/vgic/vgic-init.c | 11 ---
  3 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index cfcd0323deab..c09c701fd68e 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -834,14 +834,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return ret;
  
  no_vgic:

-   preempt_disable();
timer->enabled = 1;
-   if (!irqchip_in_kernel(vcpu->kvm))
-   kvm_timer_vcpu_load_user(vcpu);
-   else
-   kvm_timer_vcpu_load_vgic(vcpu);
-   preempt_enable();
-
return 0;
  }
  
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c

index 5e3c149a6e28..360df72692ee 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -631,27 +631,22 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
if (unlikely(!kvm_vcpu_initialized(vcpu)))
return -ENOEXEC;
  
-	vcpu_load(vcpu);

-
ret = kvm_vcpu_first_run_init(vcpu);
if (ret)
-   goto out;
+   return ret;
  
  	if (run->exit_reason == KVM_EXIT_MMIO) {

ret = kvm_handle_mmio_return(vcpu, vcpu->run);
if (ret)
-   goto out;
-   if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) {
-   ret = 0;
-   goto out;
-   }
-
+   return ret;
+   if (kvm_arm_handle_step_debug(vcpu, vcpu->run))
+   return 0;
}
  
-	if (run->immediate_exit) {

-   ret = -EINTR;
-   goto out;
-   }
+   if (run->immediate_exit)
+   return -EINTR;
+
+   vcpu_load(vcpu);
  
  	kvm_sigset_activate(vcpu);
  
@@ -803,7 +798,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
  
  	kvm_sigset_deactivate(vcpu);
  
-out:

vcpu_put(vcpu);
return ret;
  }
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 62310122ee78..a0688ef52ad7 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -300,17 +300,6 @@ int vgic_init(struct kvm *kvm)
  
  	dist->initialized = true;
  
-	/*

-* If we're initializing GICv2 on-demand when first running the VCPU
-* then we need to load the VGIC state onto the CPU.  We can detect
-* this easily by checking if we are in between vcpu_load and vcpu_put
-* when we just initialized the VGIC.
-*/
-   preempt_disable();
-   vcpu = kvm_arm_get_running_vcpu();
-   if (vcpu)
-   kvm_vgic_load(vcpu);
-   preempt_enable();
  out:
    return ret;
  }



--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 03/41] KVM: arm64: Avoid storing the vcpu pointer on the stack

2018-02-05 Thread Julien Grall

Hi Christoffer,

On 12/01/18 12:07, Christoffer Dall wrote:

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 048f5db120f3..6ce0b428a4db 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -350,10 +350,15 @@ int kvm_perf_teardown(void);
  
  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
  
+extern void __kvm_set_tpidr_el2(u64 tpidr_el2);


NIT: The rest of the file seem to declare prototype without extern.

[...]


diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 71bf088f1e4b..612021dce84f 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -135,6 +135,7 @@ int main(void)
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
DEFINE(VCPU_FPEXC32_EL2,offsetof(struct kvm_vcpu, 
arch.ctxt.sys_regs[FPEXC32_EL2]));
DEFINE(VCPU_HOST_CONTEXT,   offsetof(struct kvm_vcpu, 
arch.host_cpu_context));
+  DEFINE(HOST_CONTEXT_VCPU,offsetof(struct kvm_cpu_context, 
__hyp_running_vcpu));
  #endif
  #ifdef CONFIG_CPU_PM
DEFINE(CPU_SUSPEND_SZ,  sizeof(struct cpu_suspend_ctx));
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 9a8ab59e..a360ac6e89e9 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,9 +62,6 @@ ENTRY(__guest_enter)
// Store the host regs
save_callee_saved_regs x1
  
-	// Store host_ctxt and vcpu for use at exit time

-   stp x1, x0, [sp, #-16]!
-
add x18, x0, #VCPU_CONTEXT
  
  	// Restore guest regs x0-x17

@@ -118,8 +115,7 @@ ENTRY(__guest_exit)
// Store the guest regs x19-x29, lr
save_callee_saved_regs x1
  
-	// Restore the host_ctxt from the stack

-   ldr x2, [sp], #16
+   get_host_ctxt   x2, x3
  
  	// Now restore the host regs

restore_callee_saved_regs x2
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index e4f37b9dd47c..71b4cc92895e 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -56,18 +56,15 @@ ENDPROC(__vhe_hyp_call)
  el1_sync: // Guest trapped into EL2
stp x0, x1, [sp, #-16]!
  
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN

-   mrs x1, esr_el2
-alternative_else
-   mrs x1, esr_el1
-alternative_endif
-   lsr x0, x1, #ESR_ELx_EC_SHIFT
+   mrs x1, vttbr_el2   // If vttbr is valid, this is a trap
+   cbnzx1, el1_trap// from the guest
  
-	cmp	x0, #ESR_ELx_EC_HVC64

-   b.neel1_trap
-
-   mrs x1, vttbr_el2   // If vttbr is valid, the 64bit guest
-   cbnzx1, el1_trap// called HVC
+#ifdef CONFIG_DEBUG
+   mrs x0, esr_el2
+   lsr x0, x0, #ESR_ELx_EC_SHIFT
+   cmp x0, #ESR_ELx_EC_HVC64
+   b.ne__hyp_panic
+#endif


FWIW, I noticed that Mark's series about Spectre is also touching this 
code (see https://patchwork.kernel.org/patch/10190297/).


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 04/41] KVM: arm64: Rework hyp_panic for VHE and non-VHE

2018-02-05 Thread Julien Grall

Hi Christoffer,

On 12/01/18 12:07, Christoffer Dall wrote:

VHE actually doesn't rely on clearing the VTTBR when returning to the
host kernel, and that is the current key mechanism of hyp_panic to
figure out how to attempt to return to a state good enough to print a
panic statement.

Therefore, we split the hyp_panic function into two functions, a VHE and
a non-VHE, keeping the non-VHE version intact, but changing the VHE
behavior.

The vttbr_el2 check on VHE doesn't really make that much sense, because
the only situation where we can get here on VHE is when the hypervisor
assembly code actually called into hyp_panic, which only happens when
VBAR_EL2 has been set to the KVM exception vectors.  On VHE, we can
always safely disable the traps and restore the host registers at this
point, so we simply do that unconditionally and call into the panic
function directly.

Acked-by: Marc Zyngier 
Signed-off-by: Christoffer Dall 
---
  arch/arm64/kvm/hyp/switch.c | 42 +++---
  1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 6fcb37e220b5..71700ecee308 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -419,10 +419,20 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
  static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx 
ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
  
  static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,

-struct kvm_vcpu *vcpu)
+struct kvm_cpu_context 
*__host_ctxt)
  {
+   struct kvm_vcpu *vcpu;
unsigned long str_va;
  
+	vcpu = __host_ctxt->__hyp_running_vcpu;

+
+   if (read_sysreg(vttbr_el2)) {
+   __timer_disable_traps(vcpu);
+   __deactivate_traps(vcpu);
+   __deactivate_vm(vcpu);
+   __sysreg_restore_host_state(__host_ctxt);
+   }
+
/*
 * Force the panic string to be loaded from the literal pool,
 * making sure it is a kernel address and not a PC-relative
@@ -436,37 +446,31 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, 
u64 elr, u64 par,
   read_sysreg(hpfar_el2), par, vcpu);
  }
  
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,

-   struct kvm_vcpu *vcpu)
+static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+struct kvm_cpu_context *host_ctxt)
  {
+   struct kvm_vcpu *vcpu;
+   vcpu = host_ctxt->__hyp_running_vcpu;
+
+   __deactivate_traps(vcpu);
+   __sysreg_restore_host_state(host_ctxt);


I was about to ask why you keep this function around as it does nothing 
in VHE case. But I see that this will actually restore some values in a 
later patch.



+
panic(__hyp_panic_string,
  spsr,  elr,
  read_sysreg_el2(esr),   read_sysreg_el2(far),
  read_sysreg(hpfar_el2), par, vcpu);
  }
  
-static hyp_alternate_select(__hyp_call_panic,

-   __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
-   ARM64_HAS_VIRT_HOST_EXTN);


Out of interest, any specific rather to remove hyp_alternate_select and 
"open-code" it?



-
  void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
  {
-   struct kvm_vcpu *vcpu = NULL;
-
u64 spsr = read_sysreg_el2(spsr);
u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
  
-	if (read_sysreg(vttbr_el2)) {

-   vcpu = host_ctxt->__hyp_running_vcpu;
-   __timer_disable_traps(vcpu);
-   __deactivate_traps(vcpu);
-   __deactivate_vm(vcpu);
-   __sysreg_restore_host_state(host_ctxt);
-   }
-
-   /* Call panic for real */
-   __hyp_call_panic()(spsr, elr, par, vcpu);
+   if (!has_vhe())
+   __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
+   else
+   __hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
  
  	unreachable();

  }



Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 04/41] KVM: arm64: Rework hyp_panic for VHE and non-VHE

2018-02-05 Thread Julien Grall



On 05/02/18 18:04, Julien Grall wrote:

On 12/01/18 12:07, Christoffer Dall wrote:
@@ -436,37 +446,31 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 
spsr, u64 elr, u64 par,

 read_sysreg(hpfar_el2), par, vcpu);
  }
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
-    struct kvm_vcpu *vcpu)
+static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_cpu_context *host_ctxt)
  {
+    struct kvm_vcpu *vcpu;
+    vcpu = host_ctxt->__hyp_running_vcpu;
+
+    __deactivate_traps(vcpu);
+    __sysreg_restore_host_state(host_ctxt);


I was about to ask why you keep this function around as it does nothing 
in VHE case. But I see that this will actually restore some values in a 
later patch.


Actually, I just misread the code. Sorry for the noise.

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 04/41] KVM: arm64: Rework hyp_panic for VHE and non-VHE

2018-02-09 Thread Julien Grall

Hi Christoffer,

On 02/08/2018 01:24 PM, Christoffer Dall wrote:

On Mon, Feb 05, 2018 at 06:04:25PM +, Julien Grall wrote:

On 12/01/18 12:07, Christoffer Dall wrote:

+
panic(__hyp_panic_string,
  spsr,  elr,
  read_sysreg_el2(esr),   read_sysreg_el2(far),
  read_sysreg(hpfar_el2), par, vcpu);
  }
-static hyp_alternate_select(__hyp_call_panic,
-   __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
-   ARM64_HAS_VIRT_HOST_EXTN);


Out of interest, any specific rather to remove hyp_alternate_select and
"open-code" it?



Not sure I understand your question.

Are you asking why I replace the hyp alternatives with the has_vhe()?
If so, has_vhe() uses a static key and should therefore have the same
performance characteristics, but I find the has_vhe() version below much
more readable.


That what I was asking. Thank you for the explanation.

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 05/41] KVM: arm64: Move HCR_INT_OVERRIDE to default HCR_EL2 guest flag

2018-02-09 Thread Julien Grall

Hi,

On 01/12/2018 12:07 PM, Christoffer Dall wrote:

From: Shih-Wei Li 

We always set the IMO and FMO bits in the HCR_EL2 when running the
guest, regardless if we use the vgic or not.  By moving these flags to
HCR_GUEST_FLAGS we can avoid one of the extra save/restore operations of
HCR_EL2 in the world switch code, and we can also soon get rid of the
other one.

This is safe, because even though the IMO and FMO bits control both
taking the interrupts to EL2 and remapping ICC_*_EL1 to ICV_*_EL1
executed at EL1, as long as we ensure that these bits are clear when
running the EL1 host, as defined in the HCR_HOST_[VHE_]FLAGS, we're OK.


NIT: I was a bit confused by the end of the sentence because 
HCR_HOST_FLAGS define does not seem to exist.




Reviewed-by: Marc Zyngier 
Signed-off-by: Shih-Wei Li 
Signed-off-by: Christoffer Dall 
---
  arch/arm64/include/asm/kvm_arm.h | 4 ++--
  arch/arm64/kvm/hyp/switch.c  | 3 ---
  2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 715d395ef45b..656deeb17bf2 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -79,9 +79,9 @@
   */
  #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
-HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
+HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | \
+HCR_FMO | HCR_IMO)
  #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
-#define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
  #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
  
  /* TCR_EL2 Registers bits */

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 71700ecee308..f6189d08753e 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -167,8 +167,6 @@ static void __hyp_text __vgic_save_state(struct kvm_vcpu 
*vcpu)
__vgic_v3_save_state(vcpu);
else
__vgic_v2_save_state(vcpu);
-
-   write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2);
  }
  
  static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)

@@ -176,7 +174,6 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu 
*vcpu)
u64 val;
  
  	val = read_sysreg(hcr_el2);

-   val |=  HCR_INT_OVERRIDE;
val |= vcpu->arch.irq_lines;
write_sysreg(val, hcr_el2);
  



Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 08/41] KVM: arm/arm64: Introduce vcpu_el1_is_32bit

2018-02-09 Thread Julien Grall

Hi Christoffer,

On 01/12/2018 12:07 PM, Christoffer Dall wrote:

We have numerous checks around that checks if the HCR_EL2 has the RW bit
set to figure out if we're running an AArch64 or AArch32 VM.  In some
cases, directly checking the RW bit (given its unintuitive name), is a
bit confusing, and that's not going to improve as we move logic around
for the following patches that optimize KVM on AArch64 hosts with VHE.

Therefore, introduce a helper, vcpu_el1_is_32bit, and replace existing
direct checks of HCR_EL2.RW with the helper.

Signed-off-by: Christoffer Dall 


Reviewed-by: Julien Grall 

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 09/41] KVM: arm64: Defer restoring host VFP state to vcpu_put

2018-02-09 Thread Julien Grall

Hi Christoffer,

On 01/12/2018 12:07 PM, Christoffer Dall wrote:

Avoid saving the guest VFP registers and restoring the host VFP
registers on every exit from the VM.  Only when we're about to run
userspace or other threads in the kernel do we really have to switch the


s/do// ?


state back to the host state.

We still initially configure the VFP registers to trap when entering the
VM, but the difference is that we now leave the guest state in the
hardware registers as long as we're running this VCPU, even if we
occasionally trap to the host, and we only restore the host state when
we return to user space or when scheduling another thread.

Reviewed-by: Andrew Jones 
Reviewed-by: Marc Zyngier 
Signed-off-by: Christoffer Dall 
---
  arch/arm64/include/asm/kvm_host.h |  3 +++
  arch/arm64/kernel/asm-offsets.c   |  1 +
  arch/arm64/kvm/hyp/entry.S|  3 +++
  arch/arm64/kvm/hyp/switch.c   | 48 ---
  arch/arm64/kvm/hyp/sysreg-sr.c| 21 ++---
  5 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 0e9e7291a7e6..9e23bc968668 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -213,6 +213,9 @@ struct kvm_vcpu_arch {
/* Guest debug state */
u64 debug_flags;
  
+	/* 1 if the guest VFP state is loaded into the hardware */

+   u8 guest_vfp_loaded;
+
/*
 * We maintain more than a single set of debug registers to support
 * debugging the guest from the host and to maintain separate host and
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 612021dce84f..99467327c043 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -133,6 +133,7 @@ int main(void)
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
DEFINE(CPU_USER_PT_REGS,offsetof(struct kvm_regs, regs));
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
+  DEFINE(VCPU_GUEST_VFP_LOADED,offsetof(struct kvm_vcpu, 
arch.guest_vfp_loaded));
DEFINE(VCPU_FPEXC32_EL2,offsetof(struct kvm_vcpu, 
arch.ctxt.sys_regs[FPEXC32_EL2]));
DEFINE(VCPU_HOST_CONTEXT,   offsetof(struct kvm_vcpu, 
arch.host_cpu_context));
DEFINE(HOST_CONTEXT_VCPU,   offsetof(struct kvm_cpu_context, 
__hyp_running_vcpu));
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index a360ac6e89e9..53652287a236 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -184,6 +184,9 @@ alternative_endif
add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
bl  __fpsimd_restore_state
  
+	mov	x0, #1

+   strbw0, [x3, #VCPU_GUEST_VFP_LOADED]
+
// Skip restoring fpexc32 for AArch64 guests
mrs x1, hcr_el2
tbnzx1, #HCR_RW_SHIFT, 1f
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 12dc647a6e5f..29e44a20f5e3 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -24,43 +24,32 @@
  #include 
  #include 
  
-static bool __hyp_text __fpsimd_enabled_nvhe(void)

-{
-   return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
-}
-
-static bool __hyp_text __fpsimd_enabled_vhe(void)
-{
-   return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
-}
-
-static hyp_alternate_select(__fpsimd_is_enabled,
-   __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe,
-   ARM64_HAS_VIRT_HOST_EXTN);
-
-bool __hyp_text __fpsimd_enabled(void)


Now that __fpsimd_enabled is removed, I think you need to remove the 
prototype in arch/arm64/include/kvm_hyp.h too.



-{
-   return __fpsimd_is_enabled()();
-}


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 14/41] KVM: arm64: Introduce VHE-specific kvm_vcpu_run

2018-02-09 Thread Julien Grall
p(__kvm_vcpu_run, vcpu);

+   if (has_vhe())
+   ret = kvm_vcpu_run_vhe(vcpu);
+   else
+   ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu);
  
  		vcpu->mode = OUTSIDE_GUEST_MODE;

vcpu->stat.exits++;



Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 17/41] KVM: arm64: Remove noop calls to timer save/restore from VHE switch

2018-02-09 Thread Julien Grall

Hi Christoffer,

On 01/12/2018 12:07 PM, Christoffer Dall wrote:

The VHE switch function calls __timer_enable_traps and
__timer_disable_traps which don't do anything on VHE systems.
Therefore, simply remove these calls from the VHE switch function and
make the functions non-conditional as they are now only called from the
non-VHE switch path.

Acked-by: Marc Zyngier 
Signed-off-by: Christoffer Dall 
---
  arch/arm64/kvm/hyp/switch.c |  2 --
  virt/kvm/arm/hyp/timer-sr.c | 44 ++--
  2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 9aadef6966bf..6175fcb33ed2 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -354,7 +354,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__activate_vm(vcpu->kvm);
  
  	__vgic_restore_state(vcpu);

-   __timer_enable_traps(vcpu);
  
  	/*

 * We must restore the 32-bit state before the sysregs, thanks
@@ -373,7 +372,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
  
  	__sysreg_save_guest_state(guest_ctxt);

__sysreg32_save_state(vcpu);
-   __timer_disable_traps(vcpu);
__vgic_save_state(vcpu);
  
  	__deactivate_traps(vcpu);

diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index f24404b3c8df..77754a62eb0c 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -27,34 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, 
u32 cntvoff_high)
write_sysreg(cntvoff, cntvoff_el2);
  }
  
+/*

+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
  void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)


Would it be worth to suffix the function with nvhe? So it would be clear 
that it should not be called for VHE system?



  {
-   /*
-* We don't need to do this for VHE since the host kernel runs in EL2
-* with HCR_EL2.TGE ==1, which makes those bits have no impact.
-*/
-   if (!has_vhe()) {
-   u64 val;
+   u64 val;
  
-		/* Allow physical timer/counter access for the host */

-   val = read_sysreg(cnthctl_el2);
-   val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
-   write_sysreg(val, cnthctl_el2);
-   }
+   /* Allow physical timer/counter access for the host */
+   val = read_sysreg(cnthctl_el2);
+   val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+   write_sysreg(val, cnthctl_el2);
  }
  
+/*

+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
  void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu)


Same here.


  {
-   if (!has_vhe()) {
-   u64 val;
+   u64 val;
  
-		/*

-* Disallow physical timer access for the guest
-* Physical counter access is allowed
-*/
-   val = read_sysreg(cnthctl_el2);
-   val &= ~CNTHCTL_EL1PCEN;
-   val |= CNTHCTL_EL1PCTEN;
-   write_sysreg(val, cnthctl_el2);
-   }
+   /*
+* Disallow physical timer access for the guest
+* Physical counter access is allowed
+*/
+   val = read_sysreg(cnthctl_el2);
+   val &= ~CNTHCTL_EL1PCEN;
+   val |= CNTHCTL_EL1PCTEN;
+   write_sysreg(val, cnthctl_el2);
  }



Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 18/41] KVM: arm64: Move userspace system registers into separate function

2018-02-09 Thread Julien Grall

Hi Christoffer,

On 01/12/2018 12:07 PM, Christoffer Dall wrote:

There's a semantic difference between the EL1 registers that control
operation of a kernel running in EL1 and EL1 registers that only control
userspace execution in EL0.  Since we can defer saving/restoring the
latter, move them into their own function.

We also take this chance to rename the function saving/restoring the
remaining system register to make it clear this function deals with
the EL1 system registers.

No functional change.

Reviewed-by: Andrew Jones 
Signed-off-by: Christoffer Dall 
---
  arch/arm64/kvm/hyp/sysreg-sr.c | 46 +++---
  1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 848a46eb33bf..99dd50ce483b 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -34,18 +34,27 @@ static void __hyp_text __sysreg_do_nothing(struct 
kvm_cpu_context *ctxt) { }
  
  static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)

  {
-   ctxt->sys_regs[ACTLR_EL1]= read_sysreg(actlr_el1);


I am a bit confused, the comment on top of the function says the host 
must save ACTLR_EL1 in the VHE case. But AFAICT, after this patch the 
register will not get saved in the host context. Did I miss anything?


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 03/40] KVM: arm64: Avoid storing the vcpu pointer on the stack

2018-02-19 Thread Julien Grall

Hi Christoffer,

On 15/02/18 21:02, Christoffer Dall wrote:

We already have the percpu area for the host cpu state, which points to
the VCPU, so there's no need to store the VCPU pointer on the stack on
every context switch.  We can be a little more clever and just use
tpidr_el2 for the percpu offset and load the VCPU pointer from the host
context.

This does require us to calculate the percpu offset without including
the offset from the kernel mapping of the percpu array to the linear
mapping of the array (which is what we store in tpidr_el1), because a
PC-relative generated address in EL2 is already giving us the hyp alias
of the linear mapping of a kernel address.  We do this in
__cpu_init_hyp_mode() by using kvm_ksym_ref().

This change also requires us to have a scratch register, so we take the
chance to rearrange some of the el1_sync code to only look at the
vttbr_el2 to determine if this is a trap from the guest or an HVC from
the host.  We do add an extra check to call the panic code if the kernel
is configured with debugging enabled and we saw a trap from the host
which wasn't an HVC, indicating that we left some EL2 trap configured by
mistake.


You might want to remove this paragraph as you don't seem to have rework 
that part of the code in this version.


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 17/41] KVM: arm64: Remove noop calls to timer save/restore from VHE switch

2018-02-19 Thread Julien Grall

Hi Christoffer,

Sorry for the late reply.

On 13/02/18 22:31, Christoffer Dall wrote:

On Fri, Feb 09, 2018 at 05:53:43PM +, Julien Grall wrote:

Hi Christoffer,

On 01/12/2018 12:07 PM, Christoffer Dall wrote:

The VHE switch function calls __timer_enable_traps and
__timer_disable_traps which don't do anything on VHE systems.
Therefore, simply remove these calls from the VHE switch function and
make the functions non-conditional as they are now only called from the
non-VHE switch path.

Acked-by: Marc Zyngier 
Signed-off-by: Christoffer Dall 
---
  arch/arm64/kvm/hyp/switch.c |  2 --
  virt/kvm/arm/hyp/timer-sr.c | 44 ++--
  2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 9aadef6966bf..6175fcb33ed2 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -354,7 +354,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__activate_vm(vcpu->kvm);
__vgic_restore_state(vcpu);
-   __timer_enable_traps(vcpu);
/*
 * We must restore the 32-bit state before the sysregs, thanks
@@ -373,7 +372,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__sysreg_save_guest_state(guest_ctxt);
__sysreg32_save_state(vcpu);
-   __timer_disable_traps(vcpu);
__vgic_save_state(vcpu);
__deactivate_traps(vcpu);
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index f24404b3c8df..77754a62eb0c 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -27,34 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, 
u32 cntvoff_high)
write_sysreg(cntvoff, cntvoff_el2);
  }
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
  void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)


Would it be worth to suffix the function with nvhe? So it would be clear
that it should not be called for VHE system?


Actually, I decided against this, because it's also called from the
32-bit code and it looks a little strange there, and it's not like we
have an equivalent _vhe version.


The main goal was to provide a naming that would prevent someone to use 
it in VHE case. This would have also  been inline with other patches 
where you rename some helpers to nvhe/vhe even in arm32 code.


Anyway, I guess the reviewers will be careful enough to spot that :).

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 17/40] KVM: arm64: Move userspace system registers into separate function

2018-02-19 Thread Julien Grall

Hi Christoffer,

On 15/02/18 21:03, Christoffer Dall wrote:

There's a semantic difference between the EL1 registers that control
operation of a kernel running in EL1 and EL1 registers that only control
userspace execution in EL0.  Since we can defer saving/restoring the
latter, move them into their own function.

ACTLR_EL1 is not used by a VHE host, so we can move this register into
the EL1 state which is not saved/restored for a VHE host.


Looking at D10.2.1 (ARM DDI 0487C.a), the statement regarding the use of 
ACTLR_EL1 seems to be less strong than what you state here. It looks 
like it would be possible to have hardware where ACTLR_EL1 would still 
have an effect on host EL0. I also read the comments on the version 2 of 
this patch but I wasn't able to find what I missing.


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 24/40] KVM: arm64: Rewrite system register accessors to read/write functions

2018-02-19 Thread Julien Grall

Hi Christoffer,

On 15/02/18 21:03, Christoffer Dall wrote:

From: Christoffer Dall 

Currently we access the system registers array via the vcpu_sys_reg()
macro.  However, we are about to change the behavior to some times
modify the register file directly, so let's change this to two
primitives:

  * Accessor macros vcpu_write_sys_reg() and vcpu_read_sys_reg()
  * Direct array access macro __vcpu_sys_reg()

The first primitive should be used in places where the code needs to
access the currently loaded VCPU's state as observed by the guest.  For
example, when trapping on cache related registers, a write to a system
register should go directly to the VCPU version of the register.

The second primitive can be used in places where the VCPU is known to


"second primitive" is a bit confusing here. I count 3 primitives above: 
(vcpu_write_sys_reg(), vcpu_read_sys_reg() and __vcpu_sys_reg(). From 
the description, I would say to refer to the latter (i.e third one).



never be running (for example userspace access) or for registers which
are never context switched (for example all the PMU system registers).

This rewrites all users of vcpu_sys_regs to one of the two primitives
above.

No functional change.

Signed-off-by: Christoffer Dall 


[...]


diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index f2a6f39aec87..68398bf7882f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -287,7 +287,18 @@ struct kvm_vcpu_arch {
  };
  
  #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)

-#define vcpu_sys_reg(v,r)  ((v)->arch.ctxt.sys_regs[(r)])
+
+/*
+ * Only use __vcpu_sys_reg if you know you want the memory backed version of a
+ * register, and not the one most recently accessed by a runnning VCPU.  For


NIT: s/runnning/running/


+ * example, for userpace access or for system registers that are never context


NIT: s/userpace/userspace/


+ * switched, but only emulated.
+ */
+#define __vcpu_sys_reg(v,r)((v)->arch.ctxt.sys_regs[(r)])
+
+#define vcpu_read_sys_reg(v,r) __vcpu_sys_reg(v,r)
+#define vcpu_write_sys_reg(v,r,n)  do { __vcpu_sys_reg(v,r) = n; } while 
(0)
+
  /*
   * CP14 and CP15 live in the same array, as they are backed by the
   * same system registers.


[...]


diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b48af790615e..a05d2c01c786 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c


[...]


@@ -817,10 +818,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, 
struct sys_reg_params *p,
return false;
}
  
-		vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval

-   & ARMV8_PMU_USERENR_MASK;
-   } else {
-   p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0)
+   __vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
+  p->regval & ARMV8_PMU_USERENR_MASK;
+   } else  {


NIT: There is a double space between else and {.


+   p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0)
& ARMV8_PMU_USERENR_MASK;
}
  


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 25/40] KVM: arm64: Introduce framework for accessing deferred sysregs

2018-02-22 Thread Julien Grall

Hi Christoffer,

On 15/02/18 21:03, Christoffer Dall wrote:

We are about to defer saving and restoring some groups of system
registers to vcpu_put and vcpu_load on supported systems.  This means
that we need some infrastructure to access system registes which


NIT: s/registes/registers/


supports either accessing the memory backing of the register or directly
accessing the system registers, depending on the state of the system
when we access the register.

We do this by defining read/write accessor functions, which can handle
both "immediate" and "deferrable" system registers.  Immediate registers
are always saved/restored in the world-switch path, but deferrable
registers are only saved/restored in vcpu_put/vcpu_load when supported
and sysregs_loaded_on_cpu will be set in that case.

Note that we don't use the deferred mechanism yet in this patch, but only
introduce infrastructure.  This is to improve convenience of review in


NIT: double space after the period.


the subsequent patches where it is clear which registers become
deferred.

Signed-off-by: Christoffer Dall 
---

Notes:
 Changes since v3:
  - Changed to a switch-statement based approach to improve
readability.
 
 Changes since v2:

  - New patch (deferred register handling has been reworked)

  arch/arm64/include/asm/kvm_host.h |  8 ++--
  arch/arm64/kvm/sys_regs.c | 33 +
  2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 68398bf7882f..b463b5e28959 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -284,6 +284,10 @@ struct kvm_vcpu_arch {
  
  	/* Virtual SError ESR to restore when HCR_EL2.VSE is set */

u64 vsesr_el2;
+
+   /* True when deferrable sysregs are loaded on the physical CPU,
+* see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */


NIT: I think the preferred style comment is
/*
 * Foo
 */

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 28/40] KVM: arm64: Defer saving/restoring 64-bit sysregs to vcpu load/put on VHE

2018-02-22 Thread Julien Grall

Hi Christoffer,

On 15/02/18 21:03, Christoffer Dall wrote:

Some system registers do not affect the host kernel's execution and can
therefore be loaded when we are about to run a VCPU and we don't have to
restore the host state to the hardware before the time when we are
actually about to return to userspace or schedule out the VCPU thread.

The EL1 system registers and the userspace state registers only
affecting EL0 execution do not need to be saved and restored on every
switch between the VM and the host, because they don't affect the host
kernel's execution.

We mark all registers which are now deffered as such in the


NIT: s/deffered/deferred/ I think.


vcpu_{read,write}_sys_reg accessors in sys-regs.c to ensure the most
up-to-date copy is always accessed.

Note MPIDR_EL1 (controlled via VMPIDR_EL2) is accessed from other vcpu
threads, for example via the GIC emulation, and therefore must be
declared as immediate, which is fine as the guest cannot modify this
value.

The 32-bit sysregs can also be deferred but we do this in a separate
patch as it requires a bit more infrastructure.



[...]


diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b3c3f014aa61..f060309337aa 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -87,6 +87,26 @@ u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
 * exit from the guest but are only saved on vcpu_put.
 */
switch (reg) {
+   case CSSELR_EL1:return read_sysreg_s(SYS_CSSELR_EL1);
+   case SCTLR_EL1: return read_sysreg_s(sctlr_EL12);
+   case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
+   case CPACR_EL1: return read_sysreg_s(cpacr_EL12);
+   case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12);
+   case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12);
+   case TCR_EL1:   return read_sysreg_s(tcr_EL12);
+   case ESR_EL1:   return read_sysreg_s(esr_EL12);
+   case AFSR0_EL1: return read_sysreg_s(afsr0_EL12);
+   case AFSR1_EL1: return read_sysreg_s(afsr1_EL12);
+   case FAR_EL1:   return read_sysreg_s(far_EL12);
+   case MAIR_EL1:  return read_sysreg_s(mair_EL12);
+   case VBAR_EL1:  return read_sysreg_s(vbar_EL12);
+   case CONTEXTIDR_EL1:return read_sysreg_s(contextidr_EL12);
+   case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
+   case TPIDRRO_EL0:   return read_sysreg_s(SYS_TPIDRRO_EL0);


I find a bit confusing to have some EL0 registers in the middle of EL1 
ones. Is it because they are listed by encoding?



+   case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
+   case AMAIR_EL1: return read_sysreg_s(amair_EL12);
+   case CNTKCTL_EL1:   return read_sysreg_s(cntkctl_EL12);
+   case PAR_EL1:   return read_sysreg_s(SYS_PAR_EL1);
}
  
  immediate_read:

@@ -103,6 +123,26 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, 
u64 val)
 * entry to the guest but are only restored on vcpu_load.
 */
switch (reg) {
+   case CSSELR_EL1:write_sysreg_s(val, SYS_CSSELR_EL1);return;
+   case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12);return;
+   case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
+   case CPACR_EL1: write_sysreg_s(val, cpacr_EL12);return;
+   case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12);return;
+   case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12);return;
+   case TCR_EL1:   write_sysreg_s(val, tcr_EL12);  return;
+   case ESR_EL1:   write_sysreg_s(val, esr_EL12);  return;
+   case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12);return;
+   case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12);return;
+   case FAR_EL1:   write_sysreg_s(val, far_EL12);  return;
+   case MAIR_EL1:  write_sysreg_s(val, mair_EL12); return;
+   case VBAR_EL1:  write_sysreg_s(val, vbar_EL12); return;
+   case CONTEXTIDR_EL1:write_sysreg_s(val, contextidr_EL12);   return;
+   case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return;
+   case TPIDRRO_EL0:   write_sysreg_s(val, SYS_TPIDRRO_EL0);   return;
+   case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return;
+   case AMAIR_EL1: write_sysreg_s(val, amair_EL12);return;
+   case CNTKCTL_EL1:   write_sysreg_s(val, cntkctl_EL12);  return;
+   case PAR_EL1:   write_sysreg_s(val, SYS_PAR_EL1);   return;
}
  
  immediate_write:




Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 28/40] KVM: arm64: Defer saving/restoring 64-bit sysregs to vcpu load/put on VHE

2018-02-22 Thread Julien Grall



On 22/02/18 18:30, Julien Grall wrote:

Hi Christoffer,

On 15/02/18 21:03, Christoffer Dall wrote:

Some system registers do not affect the host kernel's execution and can
therefore be loaded when we are about to run a VCPU and we don't have to
restore the host state to the hardware before the time when we are
actually about to return to userspace or schedule out the VCPU thread.

The EL1 system registers and the userspace state registers only
affecting EL0 execution do not need to be saved and restored on every
switch between the VM and the host, because they don't affect the host
kernel's execution.

We mark all registers which are now deffered as such in the


NIT: s/deffered/deferred/ I think.


vcpu_{read,write}_sys_reg accessors in sys-regs.c to ensure the most
up-to-date copy is always accessed.

Note MPIDR_EL1 (controlled via VMPIDR_EL2) is accessed from other vcpu
threads, for example via the GIC emulation, and therefore must be
declared as immediate, which is fine as the guest cannot modify this
value.


I forgot to comment on this. I missed this paragraph at the first read 
and was wondering why MPIDR_EL1 was not accessed using sysreg in 
vcpu_{read,write}_sys_reg. It might be worth considering a comment in 
those functions.




The 32-bit sysregs can also be deferred but we do this in a separate
patch as it requires a bit more infrastructure.



[...]


diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b3c3f014aa61..f060309337aa 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -87,6 +87,26 @@ u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
   * exit from the guest but are only saved on vcpu_put.
   */
  switch (reg) {
+    case CSSELR_EL1:    return read_sysreg_s(SYS_CSSELR_EL1);
+    case SCTLR_EL1:    return read_sysreg_s(sctlr_EL12);
+    case ACTLR_EL1:    return read_sysreg_s(SYS_ACTLR_EL1);
+    case CPACR_EL1:    return read_sysreg_s(cpacr_EL12);
+    case TTBR0_EL1:    return read_sysreg_s(ttbr0_EL12);
+    case TTBR1_EL1:    return read_sysreg_s(ttbr1_EL12);
+    case TCR_EL1:    return read_sysreg_s(tcr_EL12);
+    case ESR_EL1:    return read_sysreg_s(esr_EL12);
+    case AFSR0_EL1:    return read_sysreg_s(afsr0_EL12);
+    case AFSR1_EL1:    return read_sysreg_s(afsr1_EL12);
+    case FAR_EL1:    return read_sysreg_s(far_EL12);
+    case MAIR_EL1:    return read_sysreg_s(mair_EL12);
+    case VBAR_EL1:    return read_sysreg_s(vbar_EL12);
+    case CONTEXTIDR_EL1:    return read_sysreg_s(contextidr_EL12);
+    case TPIDR_EL0:    return read_sysreg_s(SYS_TPIDR_EL0);
+    case TPIDRRO_EL0:    return read_sysreg_s(SYS_TPIDRRO_EL0);


I find a bit confusing to have some EL0 registers in the middle of EL1 
ones. Is it because they are listed by encoding?



+    case TPIDR_EL1:    return read_sysreg_s(SYS_TPIDR_EL1);
+    case AMAIR_EL1:    return read_sysreg_s(amair_EL12);
+    case CNTKCTL_EL1:    return read_sysreg_s(cntkctl_EL12);
+    case PAR_EL1:    return read_sysreg_s(SYS_PAR_EL1);
  }
  immediate_read:
@@ -103,6 +123,26 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, 
int reg, u64 val)

   * entry to the guest but are only restored on vcpu_load.
   */
  switch (reg) {
+    case CSSELR_EL1:    write_sysreg_s(val, SYS_CSSELR_EL1);    return;
+    case SCTLR_EL1:    write_sysreg_s(val, sctlr_EL12);    return;
+    case ACTLR_EL1:    write_sysreg_s(val, SYS_ACTLR_EL1);
return;

+    case CPACR_EL1:    write_sysreg_s(val, cpacr_EL12);    return;
+    case TTBR0_EL1:    write_sysreg_s(val, ttbr0_EL12);    return;
+    case TTBR1_EL1:    write_sysreg_s(val, ttbr1_EL12);    return;
+    case TCR_EL1:    write_sysreg_s(val, tcr_EL12);    return;
+    case ESR_EL1:    write_sysreg_s(val, esr_EL12);    return;
+    case AFSR0_EL1:    write_sysreg_s(val, afsr0_EL12);    return;
+    case AFSR1_EL1:    write_sysreg_s(val, afsr1_EL12);    return;
+    case FAR_EL1:    write_sysreg_s(val, far_EL12);    return;
+    case MAIR_EL1:    write_sysreg_s(val, mair_EL12);    return;
+    case VBAR_EL1:    write_sysreg_s(val, vbar_EL12);    return;
+    case CONTEXTIDR_EL1:    write_sysreg_s(val, contextidr_EL12);
return;
+    case TPIDR_EL0:    write_sysreg_s(val, SYS_TPIDR_EL0);
return;
+    case TPIDRRO_EL0:    write_sysreg_s(val, SYS_TPIDRRO_EL0);
return;
+    case TPIDR_EL1:    write_sysreg_s(val, SYS_TPIDR_EL1);
return;

+    case AMAIR_EL1:    write_sysreg_s(val, amair_EL12);    return;
+    case CNTKCTL_EL1:    write_sysreg_s(val, cntkctl_EL12);    return;
+    case PAR_EL1:    write_sysreg_s(val, SYS_PAR_EL1);    return;
  }
  immediate_write:



Cheers,



--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 29/40] KVM: arm64: Prepare to handle deferred save/restore of 32-bit registers

2018-02-23 Thread Julien Grall
nt spsr_idx = vcpu_spsr32_mode(vcpu);
+
+   if (!vcpu->arch.sysregs_loaded_on_cpu)
+   return vcpu_gp_regs(vcpu)->spsr[spsr_idx];
+
+   switch (spsr_idx) {
+   case KVM_SPSR_SVC:
+   return read_sysreg_el1(spsr);
+   case KVM_SPSR_ABT:
+   return read_sysreg(spsr_abt);
+   case KVM_SPSR_UND:
+   return read_sysreg(spsr_und);
+   case KVM_SPSR_IRQ:
+   return read_sysreg(spsr_irq);
+   case KVM_SPSR_FIQ:
+   return read_sysreg(spsr_fiq);
default:
BUG();
}
+}
+
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
+{
+   int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+   if (!vcpu->arch.sysregs_loaded_on_cpu) {
+   vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v;
+   return;
+   }
  
-	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];

+   switch (spsr_idx) {
+   case KVM_SPSR_SVC:
+   write_sysreg_el1(v, spsr);
+   case KVM_SPSR_ABT:
+   write_sysreg(v, spsr_abt);
+   case KVM_SPSR_UND:
+   write_sysreg(v, spsr_und);
+   case KVM_SPSR_IRQ:
+   write_sysreg(v, spsr_irq);
+   case KVM_SPSR_FIQ:
+   write_sysreg(v, spsr_fiq);
+   }
  }
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f060309337aa..d2324560c9f5 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -107,6 +107,9 @@ u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
case AMAIR_EL1: return read_sysreg_s(amair_EL12);
case CNTKCTL_EL1:   return read_sysreg_s(cntkctl_EL12);
case PAR_EL1:   return read_sysreg_s(SYS_PAR_EL1);
+   case DACR32_EL2:return read_sysreg_s(SYS_DACR32_EL2);
+   case IFSR32_EL2:return read_sysreg_s(SYS_IFSR32_EL2);
+   case DBGVCR32_EL2:  return read_sysreg_s(SYS_DBGVCR32_EL2);
}
  
  immediate_read:

@@ -143,6 +146,9 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, int reg, u64 
val)
case AMAIR_EL1: write_sysreg_s(val, amair_EL12);return;
case CNTKCTL_EL1:   write_sysreg_s(val, cntkctl_EL12);  return;
case PAR_EL1:   write_sysreg_s(val, SYS_PAR_EL1);   return;
+   case DACR32_EL2:write_sysreg_s(val, SYS_DACR32_EL2);return;
+   case IFSR32_EL2:write_sysreg_s(val, SYS_IFSR32_EL2);return;
+   case DBGVCR32_EL2:  write_sysreg_s(val, SYS_DBGVCR32_EL2);  return;
    }
  
  immediate_write:




Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 31/40] KVM: arm64: Move common VHE/non-VHE trap config in separate functions

2018-02-23 Thread Julien Grall

Hi Christoffer,

On 15/02/18 21:03, Christoffer Dall wrote:

@@ -85,37 +123,14 @@ static void __hyp_text __activate_traps(struct kvm_vcpu 
*vcpu)
  {
u64 hcr = vcpu->arch.hcr_el2;
  
-	/*

-* We are about to set CPTR_EL2.TFP to trap all floating point
-* register accesses to EL2, however, the ARM ARM clearly states that
-* traps are only taken to EL2 if the operation would not otherwise
-* trap to EL1.  Therefore, always make sure that for 32-bit guests,
-* we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
-* If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
-* it will cause an exception.
-*/
-   if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
-   write_sysreg(1 << 30, fpexc32_el2);
-   isb();
-   }
+   write_sysreg(hcr, hcr_el2);
  
  	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))

write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
  
-	write_sysreg(hcr, hcr_el2);


OOI, any reason to move the write to HCR_EL2 just before the if?


-
-   /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
-   write_sysreg(1 << 15, hstr_el2);
-   /*
-* Make sure we trap PMU access from EL0 to EL2. Also sanitize
-* PMSELR_EL0 to make sure it never contains the cycle
-* counter, which could make a PMXEVCNTR_EL0 access UNDEF at
-* EL1 instead of being trapped to EL2.
-*/
-   write_sysreg(0, pmselr_el0);
-   write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
-   write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-   __activate_traps_arch()();
+   __activate_traps_fpsimd32(vcpu);
+   __activate_traps_common(vcpu);
+   __activate_traps_arch()(vcpu);
  }
  
  static void __hyp_text __deactivate_traps_vhe(void)

@@ -160,9 +175,8 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu 
*vcpu)
if (vcpu->arch.hcr_el2 & HCR_VSE)
vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
  
+	__deactivate_traps_common();

__deactivate_traps_arch()();
-   write_sysreg(0, hstr_el2);
-   write_sysreg(0, pmuserenr_el0);
  }
  
  static void __hyp_text __activate_vm(struct kvm *kvm)




Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 35/40] KVM: arm/arm64: Get rid of vgic_elrsr

2018-02-23 Thread Julien Grall

Hi Christoffer,

On 15/02/18 21:03, Christoffer Dall wrote:

There is really no need to store the vgic_elrsr on the VGIC data
structures as the only need we have for the elrsr is to figure out if an
LR is inactive when we save the VGIC state upon returning from the
guest.  We can might as well store this in a temporary local variable.

This also gets rid of the endianness conversion in the VGIC save
function, which is completely unnecessary and would actually result in
incorrect functionality on big-endian systems, because we are only using
typed values here and not converting pointers and reading different
types here.


I can't find any endianness code removed in this code. What did I miss?

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 03/40] KVM: arm64: Avoid storing the vcpu pointer on the stack

2018-03-05 Thread Julien Grall

Hi Christoffer,

On 27/02/18 11:33, Christoffer Dall wrote:

From: Christoffer Dall 

We already have the percpu area for the host cpu state, which points to
the VCPU, so there's no need to store the VCPU pointer on the stack on
every context switch.  We can be a little more clever and just use
tpidr_el2 for the percpu offset and load the VCPU pointer from the host
context.

This has the benefit of being able to retrieve the host context even
when our stack is corrupted, and it has a potential performance benefit
because we trade a store plus a load for an mrs and a load on a round
trip to the guest.

This does require us to calculate the percpu offset without including
the offset from the kernel mapping of the percpu array to the linear
mapping of the array (which is what we store in tpidr_el1), because a
PC-relative generated address in EL2 is already giving us the hyp alias
of the linear mapping of a kernel address.  We do this in
__cpu_init_hyp_mode() by using kvm_ksym_ref().

The code that accesses ESR_EL2 was previously using an alternative to
use the _EL1 accessor on VHE systems, but this was actually unnecessary
as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2
accessor does the same thing on both systems.

Cc: Ard Biesheuvel 
Reviewed-by: Marc Zyngier 
Reviewed-by: Andrew Jones 
Signed-off-by: Christoffer Dall 


Reviewed-by: Julien Grall 

Cheers,


---

Notes:
 Changes since v4:
  - Clarified rationale in commit message.
  - Called get_host_ctxt from get_vcpu and rename get_vcpu to
get_vcpu_ptr.
 
 Changes since v3:

  - Reworked the assembly part of the patch after rebasing on v4.16-rc1
which created a conflict with the variant 2 mitigations.
  - Removed Marc's reviewed-by due to the rework.
  - Removed unneeded extern keyword in declaration in header file
 
 Changes since v1:

  - Use PC-relative addressing to access per-cpu variables instead of
using a load from the literal pool.
  - Remove stale comments as pointed out by Marc
  - Reworded the commit message as suggested by Drew

  arch/arm64/include/asm/kvm_asm.h  | 15 +++
  arch/arm64/include/asm/kvm_host.h | 15 +++
  arch/arm64/kernel/asm-offsets.c   |  1 +
  arch/arm64/kvm/hyp/entry.S|  6 +-
  arch/arm64/kvm/hyp/hyp-entry.S| 28 ++--
  arch/arm64/kvm/hyp/switch.c   |  5 +
  arch/arm64/kvm/hyp/sysreg-sr.c|  5 +
  7 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24961b732e65..7149f1520382 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -33,6 +33,7 @@
  #define KVM_ARM64_DEBUG_DIRTY_SHIFT   0
  #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
  
+/* Translate a kernel address of @sym into its equivalent linear mapping */

  #define kvm_ksym_ref(sym) \
({  \
void *val = &sym;   \
@@ -70,6 +71,20 @@ extern u32 __init_stage2_translation(void);
  
  extern void __qcom_hyp_sanitize_btac_predictors(void);
  
+#else /* __ASSEMBLY__ */

+
+.macro get_host_ctxt reg, tmp
+   adr_l   \reg, kvm_host_cpu_state
+   mrs \tmp, tpidr_el2
+   add \reg, \reg, \tmp
+.endm
+
+.macro get_vcpu_ptr vcpu, ctxt
+   get_host_ctxt \ctxt, \vcpu
+   ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+   kern_hyp_va \vcpu
+.endm
+
  #endif
  
  #endif /* __ARM_KVM_ASM_H__ */

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 596f8e414a4c..618cfee7206a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -358,10 +358,15 @@ int kvm_perf_teardown(void);
  
  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
  
+void __kvm_set_tpidr_el2(u64 tpidr_el2);

+DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+
  static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
   unsigned long hyp_stack_ptr,
   unsigned long vector_ptr)
  {
+   u64 tpidr_el2;
+
/*
 * Call initialization code, and switch to the full blown HYP code.
 * If the cpucaps haven't been finalized yet, something has gone very
@@ -370,6 +375,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 */
BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
+
+   /*
+* Calculate the raw per-cpu offset without a translation from the
+* kernel's mapping to the linear mapping, and store it in tpidr_el2
+* so that we can use adr_l 

Re: [PATCH v5 17/40] KVM: arm64: Move userspace system registers into separate function

2018-03-05 Thread Julien Grall

Hi Christoffer,

On 27/02/18 11:34, Christoffer Dall wrote:

From: Christoffer Dall 

There's a semantic difference between the EL1 registers that control
operation of a kernel running in EL1 and EL1 registers that only control
userspace execution in EL0.  Since we can defer saving/restoring the
latter, move them into their own function.

The ARMv8 ARM (ARM DDI 0487C.a) Section D10.2.1 recommends that
ACTLR_EL1 has no effect on the processor when running the VHE host, and
we can therefore move this register into the EL1 state which is only
saved/restored on vcpu_put/load for a VHE host.

We also take this chance to rename the function saving/restoring the
remaining system register to make it clear this function deals with
the EL1 system registers.

Reviewed-by: Andrew Jones 
Reviewed-by: Marc Zyngier 
Signed-off-by: Christoffer Dall 


Reviewed-by: Julien Grall 

Cheers,


---

Notes:
 Changes since v4:
  - Clarified rationale for deferring ACTLR_EL1 in the commit message.
 
 Changes since v3:

  - Correct the comment about ACTLR_EL1 and adjust commit text.
 
 Changes since v2:

  - Save restore ACTLR_EL1 as part of the EL1 registers state instead of
the user register state, as ACTLR_EL1 can't affect the host's execution
on VHE systems.
 
 Changes since v1:

  - Added comment about sp_el0 to common save sysreg save/restore functions

  arch/arm64/kvm/hyp/sysreg-sr.c | 48 ++
  1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 99fc60516103..d5a5145b4e7c 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -28,24 +28,33 @@ static void __hyp_text __sysreg_do_nothing(struct 
kvm_cpu_context *ctxt) { }
  /*
   * Non-VHE: Both host and guest must save everything.
   *
- * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
+ * VHE: Host must save tpidr*_el0, mdscr_el1, sp_el0,
   * and guest must save everything.
   */
  
  static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)

  {
-   ctxt->sys_regs[ACTLR_EL1]= read_sysreg(actlr_el1);
-   ctxt->sys_regs[TPIDR_EL0]= read_sysreg(tpidr_el0);
-   ctxt->sys_regs[TPIDRRO_EL0]  = read_sysreg(tpidrro_el0);
ctxt->sys_regs[MDSCR_EL1]= read_sysreg(mdscr_el1);
+
+   /*
+* The host arm64 Linux uses sp_el0 to point to 'current' and it must
+* therefore be saved/restored on every entry/exit to/from the guest.
+*/
ctxt->gp_regs.regs.sp= read_sysreg(sp_el0);
  }
  
-static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)

+static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+{
+   ctxt->sys_regs[TPIDR_EL0]= read_sysreg(tpidr_el0);
+   ctxt->sys_regs[TPIDRRO_EL0]  = read_sysreg(tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
  {
ctxt->sys_regs[MPIDR_EL1]= read_sysreg(vmpidr_el2);
ctxt->sys_regs[CSSELR_EL1]   = read_sysreg(csselr_el1);
ctxt->sys_regs[SCTLR_EL1]= read_sysreg_el1(sctlr);
+   ctxt->sys_regs[ACTLR_EL1]= read_sysreg(actlr_el1);
ctxt->sys_regs[CPACR_EL1]= read_sysreg_el1(cpacr);
ctxt->sys_regs[TTBR0_EL1]= read_sysreg_el1(ttbr0);
ctxt->sys_regs[TTBR1_EL1]= read_sysreg_el1(ttbr1);
@@ -73,35 +82,46 @@ static void __hyp_text __sysreg_save_state(struct 
kvm_cpu_context *ctxt)
  }
  
  static hyp_alternate_select(__sysreg_call_save_host_state,

-   __sysreg_save_state, __sysreg_do_nothing,
+   __sysreg_save_el1_state, __sysreg_do_nothing,
ARM64_HAS_VIRT_HOST_EXTN);
  
  void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)

  {
__sysreg_call_save_host_state()(ctxt);
__sysreg_save_common_state(ctxt);
+   __sysreg_save_user_state(ctxt);
  }
  
  void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt)

  {
-   __sysreg_save_state(ctxt);
+   __sysreg_save_el1_state(ctxt);
__sysreg_save_common_state(ctxt);
+   __sysreg_save_user_state(ctxt);
  }
  
  static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)

  {
-   write_sysreg(ctxt->sys_regs[ACTLR_EL1],actlr_el1);
-   write_sysreg(ctxt->sys_regs[TPIDR_EL0],tpidr_el0);
-   write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
write_sysreg(ctxt->sys_regs[MDSCR_EL1],mdscr_el1);
+
+   /*
+* The host arm64 Linux uses sp_el0 to point to 'current' and it must
+* therefore be saved/restored on every entry/exit to/from the guest.
+*/
write_sysreg(ctxt->gp_regs.regs.sp,sp_el0);
  }
 

Re: [PATCH v4 3/4] irqchip: GIC: Convert to EOImode == 1

2015-09-10 Thread Julien Grall
; + .irq_eoi= gic_eoimode1_eoi_irq,
> + .irq_set_type   = gic_set_type,
> +#ifdef CONFIG_SMP
> + .irq_set_affinity   = gic_set_affinity,
> +#endif
> + .irq_get_irqchip_state  = gic_irq_get_irqchip_state,
> + .irq_set_irqchip_state  = gic_irq_set_irqchip_state,
> + .flags  = IRQCHIP_SET_TYPE_MASKED,
> +};
> +
>  void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
>  {
>   if (gic_nr >= MAX_GIC_NR)
> @@ -359,6 +390,10 @@ static void gic_cpu_if_up(void)
>  {
>   void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
>   u32 bypass = 0;
> + u32 mode = 0;
> +
> + if (static_key_true(&supports_deactivate))
> + mode = GIC_CPU_CTRL_EOImodeNS;
>  
>   /*
>   * Preserve bypass disable bits to be written back later
> @@ -366,7 +401,7 @@ static void gic_cpu_if_up(void)
>   bypass = readl(cpu_base + GIC_CPU_CTRL);
>   bypass &= GICC_DIS_BYPASS_MASK;
>  
> - writel_relaxed(bypass | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
> + writel_relaxed(bypass | mode | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
>  }
>  
>  
> @@ -789,13 +824,20 @@ void __init gic_init_physaddr(struct device_node *node)
>  static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
>   irq_hw_number_t hw)
>  {
> + struct irq_chip *chip = &gic_chip;
> +
> + if (static_key_true(&supports_deactivate)) {
> + if (d->host_data == (void *)&gic_data[0])
> + chip = &gic_eoimode1_chip;
> + }
> +
>   if (hw < 32) {
>   irq_set_percpu_devid(irq);
> - irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
> + irq_domain_set_info(d, irq, hw, chip, d->host_data,
>   handle_percpu_devid_irq, NULL, NULL);
>   set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN);
>   } else {
> - irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
> + irq_domain_set_info(d, irq, hw, chip, d->host_data,
>   handle_fasteoi_irq, NULL, NULL);
>   set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
>   }
> @@ -986,6 +1028,8 @@ void __init gic_init_bases(unsigned int gic_nr, int 
> irq_start,
>   register_cpu_notifier(&gic_cpu_notifier);
>  #endif
>   set_handle_irq(gic_handle_irq);
> + if (static_key_true(&supports_deactivate))
> + pr_info("GIC: Using split EOI/Deactivate mode\n");
>   }
>  
>   gic_dist_init(gic);
> @@ -1001,6 +1045,7 @@ gic_of_init(struct device_node *node, struct 
> device_node *parent)
>  {
>   void __iomem *cpu_base;
>   void __iomem *dist_base;
> + struct resource cpu_res;
>   u32 percpu_offset;
>   int irq;
>  
> @@ -1013,6 +1058,16 @@ gic_of_init(struct device_node *node, struct 
> device_node *parent)
>   cpu_base = of_iomap(node, 1);
>   WARN(!cpu_base, "unable to map gic cpu registers\n");
>  
> + of_address_to_resource(node, 1, &cpu_res);
> +
> + /*
> +  * Disable split EOI/Deactivate if either HYP is not available
> +  * or the CPU interface is too small.
> +  */
> + if (gic_cnt == 0 && (!is_hyp_mode_available() ||
> +  resource_size(&cpu_res) < SZ_8K))
> + static_key_slow_dec(&supports_deactivate);
> +
>   if (of_property_read_u32(node, "cpu-offset", &percpu_offset))
>   percpu_offset = 0;
>  
> @@ -1132,6 +1187,14 @@ gic_v2_acpi_init(struct acpi_table_header *table)
>   }
>  
>   /*
> +  * Disable split EOI/Deactivate if HYP is not available. ACPI
> +  * guarantees that we'll always have a GICv2, so the CPU
> +  * interface will always be the right size.
> +  */
> + if (!is_hyp_mode_available())
> + static_key_slow_dec(&supports_deactivate);
> +
> + /*
>* Initialize zero GIC instance (no multi-GIC support). Also, set GIC
>* as default IRQ domain to allow for GSI registration and GSI to IRQ
>* number translation (see acpi_register_gsi() and acpi_gsi_to_irq()).
> diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
> index 9de976b..b1533c0 100644
> --- a/include/linux/irqchip/arm-gic.h
> +++ b/include/linux/irqchip/arm-gic.h
> @@ -20,9 +20,13 @@
>  #define GIC_CPU_ALIAS_BINPOINT   0x1c
>  #define GIC_CPU_ACTIVEPRIO   0xd0
>  #define GIC_CPU_IDENT0xfc
> +#define GIC_CPU_DEACTIVATE   0x1000
>  
>  #define GICC_ENABLE  0x1
>  #define GICC_INT_PRI_THRESHOLD   0xf0
> +
> +#define GIC_CPU_CTRL_EOImodeNS   (1 << 9)
> +
>  #define GICC_IAR_INT_ID_MASK 0x3ff
>  #define GICC_INT_SPURIOUS1023
>  #define GICC_DIS_BYPASS_MASK 0x1e0
> 


-- 
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 3/4] irqchip: GIC: Convert to EOImode == 1

2015-09-10 Thread Julien Grall
On 10/09/15 10:54, Marc Zyngier wrote:
> Hi Julian,

Hi Marc,

> On 09/09/15 20:23, Julien Grall wrote:
>> Hi,
>>
>> I've been trying the latest linus/master (a794b4f), which include this
>> patch, as baremetal kernel on X-gene. This is failing on early boot
>> without much log.
>>
>> After bisecting the tree, I found the error coming from this patch.
>> While this patch is valid, it made me remembered that X-Gene (at least
>> the first version) as an odd GICv2.
>>
>> The GICC is divided in 2 area of 4K, each one aligned at a 64KB address.
>> This means that, the address of GICC_DIR won't be 0x1000 but 0x1.
> 
> Not really. I already mentioned that one a while ago:
> 
> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-March/332249.html

Sorry I haven't seen this thread on the ML.

> The first page of GIC is aliased over the first 64kB, and the second 
> page aliased over the second 64kB. So you get a consistent mapping if 
> you use (base + 0xF000) to address GICC. Also, the DT that's in 
> mainline is showing a 4kB CPU interface, which doesn't enable 
> EOImode==1.
> You must be using a firmware that's newer than mine, since 
> I'm perfectly able to boot my Mustang with these patches.

My U-boot firmware is:

U-Boot 2013.04-mustang_sw_1.15.12 (May 20 2015 - 10:03:33)

The interrupt controller node looks like:

interrupt-controller@7809 {
reg = <0x0 0x7809 0x0 0x1 0x0 0x780a 0x0
0x2 0x0 0x780c 0x0 0x1 0x0 0x780e 0x0 0x2>;
interrupts = <0x1 0x9 0xf04>;
compatible = "arm,cortex-a15-gic";
#interrupt-cells = <0x3>;
phandle = <0x1>;
interrupt-controller;
linux,phandle = <0x1>;
};

Note that we have a recent firmware which correct the GICD region to use
the non-secure one rather than the secure. See [1] for more details.

> 
>> We had the same issue on Xen when we did the first port of X-gene [1].
>> Although, we choose to add a quirk in Xen for this platform in order to
>> map contiguously in the virtual memory the 2 part of GICC.
>>
>> Note that, back then, Ian suggested to extend the bindings to support a
>> such platform [2]. AFAICT, there was no follow-up on it.
> 
> The main problem here is not to update the binding, but the fact that 
> you *cannot* update the DT on x-gene (the firmware will replace your 
> GIC node with what it thinks it is), and the APM guys can't be bothered 
> to fix their stuff.
> 
> In the meantime, can you give the following patch a shot? My Mustang is 
> wired to a 4kB CPU interface, so I'll need your help to test it.

I applied the two patches on top of linus/master and I'm able to boot
correctly on X-gene. Thank you!

Regards,

[1] http://lists.xen.org/archives/html/xen-devel/2015-04/msg02816.html

-- 
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 3/4] irqchip: GIC: Convert to EOImode == 1

2015-09-10 Thread Julien Grall
On 10/09/15 17:30, Marc Zyngier wrote:
> On 10/09/15 17:23, Julien Grall wrote:
>> On 10/09/15 10:54, Marc Zyngier wrote:
> 
> [...]
> 
>>> In the meantime, can you give the following patch a shot? My Mustang is 
>>> wired to a 4kB CPU interface, so I'll need your help to test it.
>>
>> I applied the two patches on top of linus/master and I'm able to boot
>> correctly on X-gene. Thank you!
> 
> Thanks for testing. Can I put your Tested-by tag on the patch when I
> send it to Thomas?

Sure:

Tested-by: Julien Grall 

Regards,

-- 
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 3/4] irqchip: GIC: Convert to EOImode == 1

2015-09-11 Thread Julien Grall



On 11/09/2015 11:54, Ian Campbell wrote:

On Thu, 2015-09-10 at 17:23 +0100, Julien Grall wrote:

I applied the two patches on top of linus/master and I'm able to boot
correctly on X-gene. Thank you!


Perhaps we should replicate this approach in Xen and get rid of
  PLATFORM_QUIRK_GIC_64K_STRIDE?


I was thinking to do it. But, I wasn't sure if it was worth to get a 
such "ugly" patch compare to the quirk.


Regards,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 3/4] irqchip: GIC: Convert to EOImode == 1

2015-09-11 Thread Julien Grall
On 11/09/15 12:09, Marc Zyngier wrote:
> On 11/09/15 11:59, Julien Grall wrote:
>>
>>
>> On 11/09/2015 11:54, Ian Campbell wrote:
>>> On Thu, 2015-09-10 at 17:23 +0100, Julien Grall wrote:
>>>> I applied the two patches on top of linus/master and I'm able to boot
>>>> correctly on X-gene. Thank you!
>>>
>>> Perhaps we should replicate this approach in Xen and get rid of
>>>   PLATFORM_QUIRK_GIC_64K_STRIDE?
>>
>> I was thinking to do it. But, I wasn't sure if it was worth to get a 
>> such "ugly" patch compare to the quirk.
> 
> It is not a quirk. It is actually recommended in the SBSA spec. The
> patch is ugly because we can't do the right thing on the one platform
> that actually implemented ARM's own recommendation (we can't tell the
> bloody firmware to stop overriding our DT).
> 
> I would otherwise have added a "arm,use-sbsa-aliasing" property (or
> something similar) instead of trying to guess things...

I will give a look to port this patch on Xen.

Regards,

-- 
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 1/3] KVM: arm/arm64: vgic: Make vgic_irq->irq_lock a raw_spinlock

2019-02-01 Thread Julien Grall
 false;
irq->pending_latch = false;
irq->line_level = false;
-   spin_unlock_irqrestore(&irq->irq_lock, flags);
+   raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
  out:
vgic_put_irq(vcpu->kvm, irq);
  }
@@ -539,9 +539,9 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned 
int vintid)
irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
BUG_ON(!irq);
  
-	spin_lock_irqsave(&irq->irq_lock, flags);

+   raw_spin_lock_irqsave(&irq->irq_lock, flags);
kvm_vgic_unmap_irq(irq);
-   spin_unlock_irqrestore(&irq->irq_lock, flags);
+   raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
  
  	return 0;

@@ -571,12 +571,12 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned 
int intid, void *owner)
return -EINVAL;
  
  	irq = vgic_get_irq(vcpu->kvm, vcpu, intid);

-   spin_lock_irqsave(&irq->irq_lock, flags);
+   raw_spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->owner && irq->owner != owner)
ret = -EEXIST;
else
irq->owner = owner;
-   spin_unlock_irqrestore(&irq->irq_lock, flags);
+   raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
  
  	return ret;

  }
@@ -603,7 +603,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
bool target_vcpu_needs_kick = false;
  
-		spin_lock(&irq->irq_lock);

+   raw_spin_lock(&irq->irq_lock);
  
  		BUG_ON(vcpu != irq->vcpu);
  
@@ -616,7 +616,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)

 */
list_del(&irq->ap_list);
irq->vcpu = NULL;
-   spin_unlock(&irq->irq_lock);
+   raw_spin_unlock(&irq->irq_lock);
  
  			/*

 * This vgic_put_irq call matches the
@@ -631,13 +631,13 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
  
  		if (target_vcpu == vcpu) {

/* We're on the right CPU */
-   spin_unlock(&irq->irq_lock);
+   raw_spin_unlock(&irq->irq_lock);
continue;
}
  
  		/* This interrupt looks like it has to be migrated. */
  
-		spin_unlock(&irq->irq_lock);

+   raw_spin_unlock(&irq->irq_lock);
spin_unlock(&vgic_cpu->ap_list_lock);
  
  		/*

@@ -655,7 +655,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
 SINGLE_DEPTH_NESTING);
-   spin_lock(&irq->irq_lock);
+   raw_spin_lock(&irq->irq_lock);
  
  		/*

 * If the affinity has been preserved, move the
@@ -675,7 +675,7 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
target_vcpu_needs_kick = true;
}
  
-		spin_unlock(&irq->irq_lock);

+   raw_spin_unlock(&irq->irq_lock);
spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
  
@@ -741,10 +741,10 @@ static int compute_ap_list_depth(struct kvm_vcpu *vcpu,

list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
int w;
  
-		spin_lock(&irq->irq_lock);

+   raw_spin_lock(&irq->irq_lock);
/* GICv2 SGIs can count for more than one... */
w = vgic_irq_get_lr_count(irq);
-   spin_unlock(&irq->irq_lock);
+   raw_spin_unlock(&irq->irq_lock);
  
  		count += w;

*multi_sgi |= (w > 1);
@@ -770,7 +770,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
count = 0;
  
  	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {

-   spin_lock(&irq->irq_lock);
+   raw_spin_lock(&irq->irq_lock);
  
  		/*

 * If we have multi-SGIs in the pipeline, we need to
@@ -780,7 +780,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 * the AP list has been sorted already.
     */
if (multi_sgi && irq->priority > prio) {
-   spin_unlock(&irq->irq_lock);
+   _raw_spin_unlock(&irq->irq_lock);
break;
}
  
@@ -791,7 +791,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)

prio = irq->priority;
}
  
-		spin_unlock(&irq->irq_lock);

+   raw_spin_unlock(&irq->irq_lock);
  
  		if (count == kvm_vgic_global_state.nr_lr) {

if (!list_is_last(&irq->ap_list,
@@ -921,11 +921,11 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
  
  	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {

-   spin_lock(&irq->irq_lock);
+   raw_spin_lock(&irq->irq_lock);
pending = irq_is_pending(irq) && irq->enabled &&
  !irq->active &&
  irq->priority < vmcr.pmr;
-   spin_unlock(&irq->irq_lock);
+   raw_spin_unlock(&irq->irq_lock);
  
  		if (pending)

break;
@@ -963,11 +963,10 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, 
unsigned int vintid)
return false;
  
  	irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);

-   spin_lock_irqsave(&irq->irq_lock, flags);
+   raw_spin_lock_irqsave(&irq->irq_lock, flags);
map_is_active = irq->hw && irq->active;
-   spin_unlock_irqrestore(&irq->irq_lock, flags);
+   raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
vgic_put_irq(vcpu->kvm, irq);
  
  	return map_is_active;

  }
-



--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 1/3] KVM: arm/arm64: vgic: Make vgic_irq->irq_lock a raw_spinlock

2019-02-01 Thread Julien Grall

Hi Julia,

On 01/02/2019 17:36, Julia Cartwright wrote:

On Fri, Feb 01, 2019 at 03:30:58PM +, Julien Grall wrote:

Hi Julien,

On 07/01/2019 15:06, Julien Thierry wrote:

vgic_irq->irq_lock must always be taken with interrupts disabled as
it is used in interrupt context.


I am a bit confused with the reason here. The code mention that ap_list_lock
could be taken from the timer interrupt handler interrupt. I assume it
speaks about the handler kvm_arch_timer_handler. Looking at the
configuration of the interrupt, the flag IRQF_NO_THREAD is not set, so the
interrupt should be threaded when CONFIG_PREEMPT_FULL is set. If my
understanding is correct, this means the interrupt thread would sleep if it
takes the spinlock.

Did I miss anything? Do you have an exact path where the vGIC is actually
called from an interrupt context?


The part you're missing is that percpu interrupts are not force
threaded:

static int irq_setup_forced_threading(struct irqaction *new)
{
if (!force_irqthreads)
return 0;
if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
return 0;

/* ...*/
}


Thank you for the pointer! I think it would be worth mentioning in the commit 
message that per-cpu interrupts are not threaded.


Best regards,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 06/14] KVM: arm/arm64: Factor out VMID into struct kvm_vmid

2019-02-21 Thread Julien Grall
Hi Christoffer,

On 24/01/2019 14:00, Christoffer Dall wrote:
> Note that to avoid mapping the kvm_vmid_bits variable into hyp, we
> simply forego the masking of the vmid value in kvm_get_vttbr and rely on
> update_vmid to always assign a valid vmid value (within the supported
> range).

[...]

> - kvm->arch.vmid = kvm_next_vmid;
> + vmid->vmid = kvm_next_vmid;
>   kvm_next_vmid++;
> - kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
> -
> - /* update vttbr to be used with the new vmid */
> - pgd_phys = virt_to_phys(kvm->arch.pgd);
> - BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm));
> - vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & 
> VTTBR_VMID_MASK(kvm_vmid_bits);
> - kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp;
> + kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;

The arm64 version of kvm_get_vmid_bits does not look cheap. Indeed it required 
to read the sanitized value of SYS_ID_AA64MMFR1_EL1 that is implemented using 
the function bsearch.

So wouldn't it be better to keep kvm_vmid_bits variable for use in 
update_vttbr()?

Cheers,

-- 
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 01/26] KVM: Documentation: Document arm64 core registers in detail

2019-02-21 Thread Julien Grall

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

Since the the sizes of individual members of the core arm64
registers vary, the list of register encodings that make sense is
not a simple linear sequence.

To clarify which encodings to use, this patch adds a brief list
to the documentation.

Signed-off-by: Dave Martin 


Reviewed-by: Julien Grall 

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 02/26] arm64: fpsimd: Always set TIF_FOREIGN_FPSTATE on task state flush

2019-02-21 Thread Julien Grall

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

This patch updates fpsimd_flush_task_state() to mirror the new
semantics of fpsimd_flush_cpu_state() introduced by commit
d8ad71fa38a9 ("arm64: fpsimd: Fix TIF_FOREIGN_FPSTATE after
invalidating cpu regs").  Both functions now implicitly set


NIT: Double-space before "Both"


TIF_FOREIGN_FPSTATE to indicate that the task's FPSIMD state is not
loaded into the cpu.

As a side-effect, fpsimd_flush_task_state() now sets
TIF_FOREIGN_FPSTATE even for non-running tasks.  In the case of


NIT: Double sppace before "In".


non-running tasks this is not useful but also harmless, because the
flag is live only while the corresponding task is running.  This
function is not called from fast paths, so special-casing this for
the task == current case is not really worth it.

Compiler barriers previously present in restore_sve_fpsimd_context()
are pulled into fpsimd_flush_task_state() so that it can be safely
called with preemption enabled if necessary.

Explicit calls to set TIF_FOREIGN_FPSTATE that accompany
fpsimd_flush_task_state() calls and are now redundant are removed
as appropriate.

fpsimd_flush_task_state() is used to get exclusive access to the
representation of the task's state via task_struct, for the purpose
of replacing the state.  Thus, the call to this function should


NIT: Double-space before "Thus".


happen before manipulating fpsimd_state or sve_state etc. in
task_struct.  Anomalous cases are reordered appropriately in order


NIT: Double-space before "Anomalous".


to make the code more consistent, although there should be no
functional difference since these cases are protected by
local_bh_disable() anyway.

Signed-off-by: Dave Martin 
Reviewed-by: Alex Bennée 


Reviewed-by: Julien Grall 

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 06/26] arm64/sve: Check SVE virtualisability

2019-02-21 Thread Julien Grall

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

+   /*
+* Mismatches above sve_max_virtualisable_vl are fine, since
+* no guest is allowed to configure ZCR_EL2.LEN to exceed this:
+*/
+   if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
+   pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
+   smp_processor_id());


Would it be worth to print the unsupported vector length?

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 07/26] arm64/sve: Clarify role of the VQ map maintenance functions

2019-02-21 Thread Julien Grall

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

The roles of sve_init_vq_map(), sve_update_vq_map() and
sve_verify_vq_map() are highly non-obvious to anyone who has not dug
through cpufeatures.c in detail.

Since the way these functions interact with each other is more
important here than a full understanding of the cpufeatures code, this
patch adds comments to make the functions' roles clearer.

No functional change.

Signed-off-by: Dave Martin 


Reviewed-by: Julien Grall 

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 06/14] KVM: arm/arm64: Factor out VMID into struct kvm_vmid

2019-02-22 Thread Julien Grall

Hi Marc,

On 22/02/2019 09:18, Marc Zyngier wrote:

On Thu, 21 Feb 2019 11:02:56 +
Julien Grall  wrote:

Hi Julien,


Hi Christoffer,

On 24/01/2019 14:00, Christoffer Dall wrote:

Note that to avoid mapping the kvm_vmid_bits variable into hyp, we
simply forego the masking of the vmid value in kvm_get_vttbr and rely on
update_vmid to always assign a valid vmid value (within the supported
range).


[...]


-   kvm->arch.vmid = kvm_next_vmid;
+   vmid->vmid = kvm_next_vmid;
kvm_next_vmid++;
-   kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
-
-   /* update vttbr to be used with the new vmid */
-   pgd_phys = virt_to_phys(kvm->arch.pgd);
-   BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm));
-   vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & 
VTTBR_VMID_MASK(kvm_vmid_bits);
-   kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid | cnp;
+   kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;


The arm64 version of kvm_get_vmid_bits does not look cheap. Indeed it required
to read the sanitized value of SYS_ID_AA64MMFR1_EL1 that is implemented using
the function bsearch.

So wouldn't it be better to keep kvm_vmid_bits variable for use in 
update_vttbr()?


How often does this happen? Can you measure this overhead at all?

My understanding is that we hit this path on rollover only, having IPIed
all CPUs and invalidated all TLBs. I seriously doubt you can observe
any sort of overhead at all, given that it is so incredibly rare. But
feel free to prove me wrong!


That would happen on roll-over and the first time you allocate VMID for the VM.

I am planning to run some test with 3-bit VMIDs and provide them next week.

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 08/26] arm64/sve: Enable SVE state tracking for non-task contexts

2019-02-22 Thread Julien Grall

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

The current FPSIMD/SVE context handling support for non-task (i.e.,
KVM vcpu) contexts does not take SVE into account.  This means that


NIT: Double-space before "This".


only task contexts can safely use SVE at present.

In preparation for enabling KVM guests to use SVE, it is necessary
to keep track of SVE state for non-task contexts too.

This patch adds the necessary support, removing assumptions from
the context switch code about the location of the SVE context
storage.

When binding a vcpu context, its vector length is arbitrarily
specified as SVE_VL_MIN for now.  In any case, because TIF_SVE is


NIT: Double-space before "In".


presently cleared at vcpu context bind time, the specified vector
length will not be used for anything yet.  In later patches TIF_SVE


NIT: Double-space before "In".


will be set here as appropriate, and the appropriate maximum vector
length for the vcpu will be passed when binding.


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 11/26] KVM: arm64: Extend reset_unknown() to handle mixed RES0/UNKNOWN registers

2019-02-22 Thread Julien Grall

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

The reset_unknown() system register helper initialises a guest
register to a distinctive junk value on vcpu reset, to help expose
and debug deficient register initialisation within the guest.

Some registers such as the SVE control register ZCR_EL1 contain a
mixture of UNKNOWN fields and RES0 bits.  For these,
reset_unknown() does not work at present, since it sets all bits to
junk values instead of just the wanted bits.

There is no need to craft another special helper just for that,
since reset_unknown() almost does the appropriate thing anyway.
This patch takes advantage of the unused val field in struct
sys_reg_desc to specify a mask of bits that should be initialised
to zero instead of junk.

All existing users of reset_unknown() do not (and should not)
define a value for val, so they will implicitly set it to zero,
resulting in all bits being made UNKNOWN by this function: thus,
this patch makes no functional change for currently defined
registers.

Future patches will make use of non-zero val.

Signed-off-by: Dave Martin 


Reviewed-by: Julien Grall 

Cheers,


---
  arch/arm64/kvm/sys_regs.h | 11 +--
  1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 3b1bc7f..174ffc0 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -56,7 +56,12 @@ struct sys_reg_desc {
/* Index into sys_reg[], or 0 if we don't need to save it. */
int reg;
  
-	/* Value (usually reset value) */

+   /*
+* Value (usually reset value)
+* For reset_unknown, each bit set to 1 in val is treated as
+* RES0 in the register: the corresponding register bit is
+* reset to 0 instead of "unknown".
+*/
u64 val;
  
  	/* Custom get/set_user functions, fallback to generic if NULL */

@@ -92,7 +97,9 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
  {
BUG_ON(!r->reg);
BUG_ON(r->reg >= NR_SYS_REGS);
-   __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+
+   /* If non-zero, r->val specifies which register bits are RES0: */
+   __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL & ~r->val;
  }
  
  static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)




--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 02/26] arm64: fpsimd: Always set TIF_FOREIGN_FPSTATE on task state flush

2019-02-26 Thread Julien Grall




On 26/02/2019 12:06, Dave Martin wrote:

On Thu, Feb 21, 2019 at 12:39:39PM +, Julien Grall wrote:

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

This patch updates fpsimd_flush_task_state() to mirror the new
semantics of fpsimd_flush_cpu_state() introduced by commit
d8ad71fa38a9 ("arm64: fpsimd: Fix TIF_FOREIGN_FPSTATE after
invalidating cpu regs").  Both functions now implicitly set


NIT: Double-space before "Both"


TIF_FOREIGN_FPSTATE to indicate that the task's FPSIMD state is not
loaded into the cpu.

As a side-effect, fpsimd_flush_task_state() now sets
TIF_FOREIGN_FPSTATE even for non-running tasks.  In the case of


NIT: Double sppace before "In".


non-running tasks this is not useful but also harmless, because the
flag is live only while the corresponding task is running.  This
function is not called from fast paths, so special-casing this for
the task == current case is not really worth it.

Compiler barriers previously present in restore_sve_fpsimd_context()
are pulled into fpsimd_flush_task_state() so that it can be safely
called with preemption enabled if necessary.

Explicit calls to set TIF_FOREIGN_FPSTATE that accompany
fpsimd_flush_task_state() calls and are now redundant are removed
as appropriate.

fpsimd_flush_task_state() is used to get exclusive access to the
representation of the task's state via task_struct, for the purpose
of replacing the state.  Thus, the call to this function should


NIT: Double-space before "Thus".


happen before manipulating fpsimd_state or sve_state etc. in
task_struct.  Anomalous cases are reordered appropriately in order


NIT: Double-space before "Anomalous".


A habit rather than a mistake [1], and I don't propose to change it ;)


I wasn't aware of this. Thank you for the pointer! Please ignore the comments on 
it :).



Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 06/26] arm64/sve: Check SVE virtualisability

2019-02-26 Thread Julien Grall

Hi Dave,

On 26/02/2019 12:06, Dave Martin wrote:

On Thu, Feb 21, 2019 at 01:36:26PM +, Julien Grall wrote:

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

+   /*
+* Mismatches above sve_max_virtualisable_vl are fine, since
+* no guest is allowed to configure ZCR_EL2.LEN to exceed this:
+*/
+   if (sve_vl_from_vq(bit_to_vq(b)) <= sve_max_virtualisable_vl) {
+   pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n",
+   smp_processor_id());


Would it be worth to print the unsupported vector length?


Possibly not, but admittedly the intent is a bit unclear in this patch.

See my reply to Julien Thierry (and respond on that subthread if you
have comments, so that we don't end up with two subthreads discussing
the same thing...)


I will have a look at the thread.

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 08/26] arm64/sve: Enable SVE state tracking for non-task contexts

2019-02-26 Thread Julien Grall

Hi Dave,

On 26/02/2019 12:07, Dave Martin wrote:

On Fri, Feb 22, 2019 at 03:26:51PM +, Julien Grall wrote:

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

The current FPSIMD/SVE context handling support for non-task (i.e.,
KVM vcpu) contexts does not take SVE into account.  This means that


NIT: Double-space before "This".


See patch 2...

[...]

Does the code look reasonable to you?  This interacts with FPSIMD/SVE
context switch in the host, so it would be good to have your view on it.


I wanted to look at the rest before giving my reviewed-by tag.
FWIW, this patch looks reasonable to me.

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 08/26] arm64/sve: Enable SVE state tracking for non-task contexts

2019-02-26 Thread Julien Grall

Hi Dave,

On 26/02/2019 15:58, Dave Martin wrote:

On Tue, Feb 26, 2019 at 03:49:00PM +, Julien Grall wrote:

Hi Dave,

On 26/02/2019 12:07, Dave Martin wrote:

On Fri, Feb 22, 2019 at 03:26:51PM +, Julien Grall wrote:

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

The current FPSIMD/SVE context handling support for non-task (i.e.,
KVM vcpu) contexts does not take SVE into account.  This means that


NIT: Double-space before "This".


See patch 2...

[...]

Does the code look reasonable to you?  This interacts with FPSIMD/SVE
context switch in the host, so it would be good to have your view on it.


I wanted to look at the rest before giving my reviewed-by tag.
FWIW, this patch looks reasonable to me.


OK, does that amount to a Reviewed-by, or do you have other comments?


I have no further comments on this patch.

Reviewed-by: Julien Grall 

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 13/26] KVM: arm64/sve: System register context switch and access support

2019-02-26 Thread Julien Grall

Hi Dave,

On 18/02/2019 19:52, Dave Martin wrote:

@@ -1091,6 +1088,95 @@ static int reg_from_user(u64 *val, const void __user 
*uaddr, u64 id);
  static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
  static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
  
+static unsigned int sve_restrictions(const struct kvm_vcpu *vcpu,

+const struct sys_reg_desc *rd)
+{
+   return vcpu_has_sve(vcpu) ? 0 : REG_NO_USER | REG_NO_GUEST;
+}
+
+static unsigned int sve_id_restrictions(const struct kvm_vcpu *vcpu,
+   const struct sys_reg_desc *rd)
+{
+   return vcpu_has_sve(vcpu) ? 0 : REG_NO_USER;
+}
+
+static int get_zcr_el1(struct kvm_vcpu *vcpu,
+  const struct sys_reg_desc *rd,
+  const struct kvm_one_reg *reg, void __user *uaddr)
+{
+   if (WARN_ON(!vcpu_has_sve(vcpu)))
+   return -ENOENT;
+
+   return reg_to_user(uaddr, &vcpu->arch.ctxt.sys_regs[ZCR_EL1],
+  reg->id);
+}
+
+static int set_zcr_el1(struct kvm_vcpu *vcpu,
+  const struct sys_reg_desc *rd,
+  const struct kvm_one_reg *reg, void __user *uaddr)
+{
+   if (WARN_ON(!vcpu_has_sve(vcpu)))
+   return -ENOENT;
+
+   return reg_from_user(&vcpu->arch.ctxt.sys_regs[ZCR_EL1], uaddr,
+reg->id);
+}
+
+/* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */
+static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu)
+{
+   if (!vcpu_has_sve(vcpu))
+   return 0;
+
+   return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1);
+}
+
+static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
+  struct sys_reg_params *p,
+  const struct sys_reg_desc *rd)
+{
+   if (p->is_write)
+   return write_to_read_only(vcpu, p, rd);
+
+   p->regval = guest_id_aa64zfr0_el1(vcpu);
+   return true;
+}
+
+static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
+   const struct sys_reg_desc *rd,
+   const struct kvm_one_reg *reg, void __user *uaddr)
+{
+   u64 val;
+
+   if (!vcpu_has_sve(vcpu))
+   return -ENOENT;
+
+   val = guest_id_aa64zfr0_el1(vcpu);
+   return reg_to_user(uaddr, &val, reg->id);
+}
+
+static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu,
+   const struct sys_reg_desc *rd,
+   const struct kvm_one_reg *reg, void __user *uaddr)
+{
+   const u64 id = sys_reg_to_index(rd);
+   int err;
+   u64 val;
+
+   if (!vcpu_has_sve(vcpu))
+   return -ENOENT;
+
+   err = reg_from_user(&val, uaddr, id);
+   if (err)
+   return err;
+
+   /* This is what we mean by invariant: you can't change it. */
+   if (val != guest_id_aa64zfr0_el1(vcpu))
+   return -EINVAL;
+
+   return 0;
+}


We seem to already have code for handling invariant registers as well as reading 
ID register. I guess the only reason you can't use them is because of the check 
the vcpu is using SVE.


However, AFAICT the restrictions callback would prevent you to enter the {get, 
set}_id if the vCPU does not support SVE. So the check should not be reachable.


Did I miss anything?

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 14/26] KVM: arm64/sve: Context switch the SVE registers

2019-02-26 Thread Julien Grall

Hi Dave,

On 26/02/2019 12:13, Dave Martin wrote:

On Wed, Feb 20, 2019 at 04:46:57PM +, Julien Thierry wrote:



On 18/02/2019 19:52, Dave Martin wrote:

In order to give each vcpu its own view of the SVE registers, this
patch adds context storage via a new sve_state pointer in struct
vcpu_arch.  An additional member sve_max_vl is also added for each
vcpu, to determine the maximum vector length visible to the guest
and thus the value to be configured in ZCR_EL2.LEN while the is


"While the  is active"?


Hmmm, yes.  Thanks for deciphering that.  Done.
I think it would be more consistent if you use "vcpu" over "guest". After all 
ZCR_EL2.LEN is per vCPU.


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 13/26] KVM: arm64/sve: System register context switch and access support

2019-02-27 Thread Julien Grall

Hi Dave,

On 2/26/19 5:01 PM, Dave Martin wrote:

On Tue, Feb 26, 2019 at 04:32:30PM +, Julien Grall wrote:

On 18/02/2019 19:52, Dave Martin wrote:
We seem to already have code for handling invariant registers as well as
reading ID register. I guess the only reason you can't use them is because
of the check the vcpu is using SVE.

However, AFAICT the restrictions callback would prevent you to enter the
{get, set}_id if the vCPU does not support SVE. So the check should not be
reachable.


Hmmm, those checks were inherited from before this refactoring.

You're right: the checks are now done a common place, so the checks in
the actual accessors should be redundant.

I could demote them to WARN(), but it may make sense simply to delete
them.


I think removing the WARN() would be best as it would avoid to introduce 
most of the wrappers for the registers.




The access_id_aa64zfr0_el1() should still be reachable, since we don't
have REG_NO_GUEST for this.


__access_id_reg is taking a boolean to tell whether the register is RAZ 
or not. So you probably could re-use it passing !vcpu_has_sve(vcpu).


It feels to me we would introduce a new restriction to tell whether the 
register should be RAZ. Anyway, the new restriction is probably for a 
follow-up patch.


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 13/26] KVM: arm64/sve: System register context switch and access support

2019-02-27 Thread Julien Grall

Hi Dave,

On 2/27/19 1:50 PM, Dave Martin wrote:

On Wed, Feb 27, 2019 at 12:02:46PM +, Julien Grall wrote:

Hi Dave,

On 2/26/19 5:01 PM, Dave Martin wrote:

On Tue, Feb 26, 2019 at 04:32:30PM +, Julien Grall wrote:

On 18/02/2019 19:52, Dave Martin wrote:
We seem to already have code for handling invariant registers as well as
reading ID register. I guess the only reason you can't use them is because
of the check the vcpu is using SVE.

However, AFAICT the restrictions callback would prevent you to enter the
{get, set}_id if the vCPU does not support SVE. So the check should not be
reachable.


Hmmm, those checks were inherited from before this refactoring.

You're right: the checks are now done a common place, so the checks in
the actual accessors should be redundant.

I could demote them to WARN(), but it may make sense simply to delete
them.


I think removing the WARN() would be best as it would avoid to introduce
most of the wrappers for the registers.



The access_id_aa64zfr0_el1() should still be reachable, since we don't
have REG_NO_GUEST for this.


__access_id_reg is taking a boolean to tell whether the register is RAZ or
not. So you probably could re-use it passing !vcpu_has_sve(vcpu).

It feels to me we would introduce a new restriction to tell whether the
register should be RAZ. Anyway, the new restriction is probably for a
follow-up patch.


It's true that we should be able to handle these as regular ID regs in
the get()/set() case, when SVE is enabled for the vcpu.  I'll have a
think about how to reduce the amount of special-case code here maybe
we can indeed get of some of these accessors entitely now that access
is rejected earlier, in a more generic way.

The access() case for this register still has to be custom though; I
don't see a trivial solution for that.


I believe you can implement access_id_aa64zfr0_el1 in one line:

return __access_id_reg(vcpu, p, r, !vcpu_has_sve(vcpu));

Another possibility is to introduce REG_GUEST_RAZ and use the 
restrictions callback to set it when the vCPU is not using SVE.


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 05/11] KVM: arm/arm64: Reset the VCPU without preemption and vcpu state loaded

2019-03-04 Thread Julien Grall

Hi,

I noticed some issues with this patch when rebooting a guest after using perf.

[  577.513447] BUG: sleeping function called from invalid context at 
kernel/locking/mutex.c:908

[  577.521926] in_atomic(): 1, irqs_disabled(): 0, pid: 2323, name: qemu-system 
aar
[  577.529354] 1 lock held by qemu-system-aar/2323:
[  577.533998]  #0: f4f96804 (&vcpu->mutex){+.+.}, at: 
kvm_vcpu_ioctl+0x74/0xac0

[  577.541865] Preemption disabled at:
[  577.541871] [] kvm_reset_vcpu+0x1c/0x1d0
[  577.550882] CPU: 6 PID: 2323 Comm: qemu-system-aar Tainted: GW  5.0.0 
#1277
[  577.559137] Hardware name: AMD Seattle (Rev.B0) Development Board (Overdrive) 
(DT)

[  577.566698] Call trace:
[  577.569138]  dump_backtrace+0x0/0x140
[  577.572793]  show_stack+0x14/0x20
[  577.576103]  dump_stack+0xa0/0xd4
[  577.579412]  ___might_sleep+0x1e4/0x2b0
[  577.583241]  __might_sleep+0x60/0xb8
[  577.586810]  __mutex_lock+0x58/0x860
[  577.590378]  mutex_lock_nested+0x1c/0x28
[  577.594294]  perf_event_ctx_lock_nested+0xf4/0x238
[  577.599078]  perf_event_read_value+0x24/0x60
[  577.603341]  kvm_pmu_get_counter_value+0x80/0xe8
[  577.607950]  kvm_pmu_stop_counter+0x2c/0x98
[  577.612126]  kvm_pmu_vcpu_reset+0x58/0xd0
[  577.616128]  kvm_reset_vcpu+0xec/0x1d0
[  577.619869]  kvm_arch_vcpu_ioctl+0x6b0/0x860
[  577.624131]  kvm_vcpu_ioctl+0xe0/0xac0
[  577.627876]  do_vfs_ioctl+0xbc/0x910
[  577.631443]  ksys_ioctl+0x78/0xa8
[  577.634751]  __arm64_sys_ioctl+0x1c/0x28
[  577.638667]  el0_svc_common+0x90/0x118
[  577.642408]  el0_svc_handler+0x2c/0x80
[  577.646150]  el0_svc+0x8/0xc

This is happening because the vCPU reset code is now running with preemption 
disable. However, the perf code cannot be called with preemption disabled as it 
is using mutex.


Do you have any suggestion on the way to fix this potential issue?

Cheers,

On 07/02/2019 13:18, Marc Zyngier wrote:

From: Christoffer Dall 

We have two ways to reset a vcpu:
- either through VCPU_INIT
- or through a PSCI_ON call

The first one is easy to reason about. The second one is implemented
in a more bizarre way, as it is the vcpu that handles PSCI_ON that
resets the vcpu that is being powered-on. As we need to turn the logic
around and have the target vcpu to reset itself, we must take some
preliminary steps.

Resetting the VCPU state modifies the system register state in memory,
but this may interact with vcpu_load/vcpu_put if running with preemption
disabled, which in turn may lead to corrupted system register state.

Address this by disabling preemption and doing put/load if required
around the reset logic.

Reviewed-by: Andrew Jones 
Signed-off-by: Christoffer Dall 
Signed-off-by: Marc Zyngier 
---
  arch/arm64/kvm/reset.c | 26 --
  1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index b72a3dd56204..f21a2a575939 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -105,16 +105,33 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, 
long ext)
   * This function finds the right table above and sets the registers on
   * the virtual CPU struct to their architecturally defined reset
   * values.
+ *
+ * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
+ * ioctl or as part of handling a request issued by another VCPU in the PSCI
+ * handling code.  In the first case, the VCPU will not be loaded, and in the
+ * second case the VCPU will be loaded.  Because this function operates purely
+ * on the memory-backed valus of system registers, we want to do a full put if
+ * we were loaded (handling a request) and load the values back at the end of
+ * the function.  Otherwise we leave the state alone.  In both cases, we
+ * disable preemption around the vcpu reset as we would otherwise race with
+ * preempt notifiers which also call put/load.
   */
  int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
  {
const struct kvm_regs *cpu_reset;
+   int ret = -EINVAL;
+   bool loaded;
+
+   preempt_disable();
+   loaded = (vcpu->cpu != -1);
+   if (loaded)
+   kvm_arch_vcpu_put(vcpu);
  
  	switch (vcpu->arch.target) {

default:
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
if (!cpu_has_32bit_el1())
-   return -EINVAL;
+   goto out;
cpu_reset = &default_regs_reset32;
} else {
cpu_reset = &default_regs_reset;
@@ -137,7 +154,12 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
  
  	/* Reset timer */

-   return kvm_timer_vcpu_reset(vcpu);
+   ret = kvm_timer_vcpu_reset(vcpu);
+out:
+   if (loaded)
+   kvm_arch_vcpu_load(vcpu, smp_processor_id());
+   preempt_enable();
+   return ret;
  }
  
  void kvm_set_ip

Re: [PATCH 05/11] KVM: arm/arm64: Reset the VCPU without preemption and vcpu state loaded

2019-03-04 Thread Julien Grall

Hi,

On 04/03/2019 17:06, Marc Zyngier wrote:

On 04/03/2019 16:30, Julien Grall wrote:

Hi,

I noticed some issues with this patch when rebooting a guest after using perf.

[  577.513447] BUG: sleeping function called from invalid context at
kernel/locking/mutex.c:908
[  577.521926] in_atomic(): 1, irqs_disabled(): 0, pid: 2323, name: qemu-system 
aar
[  577.529354] 1 lock held by qemu-system-aar/2323:
[  577.533998]  #0: f4f96804 (&vcpu->mutex){+.+.}, at:
kvm_vcpu_ioctl+0x74/0xac0
[  577.541865] Preemption disabled at:
[  577.541871] [] kvm_reset_vcpu+0x1c/0x1d0
[  577.550882] CPU: 6 PID: 2323 Comm: qemu-system-aar Tainted: GW  5.0.0
#1277
[  577.559137] Hardware name: AMD Seattle (Rev.B0) Development Board (Overdrive)
(DT)
[  577.566698] Call trace:
[  577.569138]  dump_backtrace+0x0/0x140
[  577.572793]  show_stack+0x14/0x20
[  577.576103]  dump_stack+0xa0/0xd4
[  577.579412]  ___might_sleep+0x1e4/0x2b0
[  577.583241]  __might_sleep+0x60/0xb8
[  577.586810]  __mutex_lock+0x58/0x860
[  577.590378]  mutex_lock_nested+0x1c/0x28
[  577.594294]  perf_event_ctx_lock_nested+0xf4/0x238
[  577.599078]  perf_event_read_value+0x24/0x60
[  577.603341]  kvm_pmu_get_counter_value+0x80/0xe8
[  577.607950]  kvm_pmu_stop_counter+0x2c/0x98
[  577.612126]  kvm_pmu_vcpu_reset+0x58/0xd0
[  577.616128]  kvm_reset_vcpu+0xec/0x1d0
[  577.619869]  kvm_arch_vcpu_ioctl+0x6b0/0x860
[  577.624131]  kvm_vcpu_ioctl+0xe0/0xac0
[  577.627876]  do_vfs_ioctl+0xbc/0x910
[  577.631443]  ksys_ioctl+0x78/0xa8
[  577.634751]  __arm64_sys_ioctl+0x1c/0x28
[  577.638667]  el0_svc_common+0x90/0x118
[  577.642408]  el0_svc_handler+0x2c/0x80
[  577.646150]  el0_svc+0x8/0xc

This is happening because the vCPU reset code is now running with preemption
disable. However, the perf code cannot be called with preemption disabled as it
is using mutex.

Do you have any suggestion on the way to fix this potential issue?


Given that the PMU is entirely emulated, it never has any state loaded
on the CPU. It thus doesn't need to be part of the non-preemptible section.

Can you please give this (untested) patchlet one a go? It's not exactly
pretty, but I believe it will do the trick.


It does the trick. Are you going to submit the patch?

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH RFC 00/14] kvm/arm: Align the VMID allocation with the arm64 ASID one

2019-03-21 Thread Julien Grall
This patch series is moving out the ASID allocator in a separate file in order
to re-use it for the VMID. The benefits are:
- CPUs are not forced to exit a roll-over.
- Context invalidation is now per-CPU rather than
  broadcasted.

There are no performance regression on the fastpath for ASID allocation.
Actually on the hackbench measurement (300 hackbench) it was .7% faster.

The measurement was made on a Seattle based SoC (8 CPUs), with the
number of VMID limited to 4-bit. The test involves running concurrently 40
guests with 2 vCPUs. Each guest will then execute hackbench 5 times
before exiting.

The performance difference between the current algo and the new one are:
- 2.5% less exit from the guest
- 22.4% more flush, although they are now local rather than
broadcasted
- 0.11% faster (just for the record)

The ASID allocator rework to make it generic has been divided in multiple
patches to make the review easier.

A branch with the patch based on 5.1-rc1 can be found:

http://xenbits.xen.org/gitweb/?p=people/julieng/linux-arm.git;a=shortlog;h=refs/heads/vmid-rework/rfc

Cheers,

Julien Grall (14):
  arm64/mm: Introduce asid_info structure and move
asid_generation/asid_map to it
  arm64/mm: Move active_asids and reserved_asids to asid_info
  arm64/mm: Move bits to asid_info
  arm64/mm: Move the variable lock and tlb_flush_pending to asid_info
  arm64/mm: Remove dependency on MM in new_context
  arm64/mm: Store the number of asid allocated per context
  arm64/mm: Introduce NUM_ASIDS
  arm64/mm: Split asid_inits in 2 parts
  arm64/mm: Split the function check_and_switch_context in 3 parts
  arm64/mm: Introduce a callback to flush the local context
  arm64: Move the ASID allocator code in a separate file
  arm64/lib: asid: Allow user to update the context under the lock
  arm/kvm: Introduce a new VMID allocator
  kvm/arm: Align the VMID allocation with the arm64 ASID one

 arch/arm/include/asm/kvm_asid.h   |  81 +++
 arch/arm/include/asm/kvm_asm.h|   2 +-
 arch/arm/include/asm/kvm_host.h   |   5 +-
 arch/arm/include/asm/kvm_hyp.h|   1 +
 arch/arm/kvm/Makefile |   1 +
 arch/arm/kvm/asid.c   | 191 +++
 arch/arm/kvm/hyp/tlb.c|   8 +-
 arch/arm64/include/asm/asid.h |  81 +++
 arch/arm64/include/asm/kvm_asid.h |   8 ++
 arch/arm64/include/asm/kvm_asm.h  |   2 +-
 arch/arm64/include/asm/kvm_host.h |   5 +-
 arch/arm64/kvm/hyp/tlb.c  |  10 +-
 arch/arm64/lib/Makefile   |   2 +
 arch/arm64/lib/asid.c | 191 +++
 arch/arm64/mm/context.c   | 205 ++
 virt/kvm/arm/arm.c| 112 +++--
 16 files changed, 638 insertions(+), 267 deletions(-)
 create mode 100644 arch/arm/include/asm/kvm_asid.h
 create mode 100644 arch/arm/kvm/asid.c
 create mode 100644 arch/arm64/include/asm/asid.h
 create mode 100644 arch/arm64/include/asm/kvm_asid.h
 create mode 100644 arch/arm64/lib/asid.c

-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH RFC 01/14] arm64/mm: Introduce asid_info structure and move asid_generation/asid_map to it

2019-03-21 Thread Julien Grall
In an attempt to make the ASID allocator generic, create a new structure
asid_info to store all the information necessary for the allocator.

For now, move the variables asid_generation and asid_map to the new structure
asid_info. Follow-up patches will move more variables.

Note to avoid more renaming aftwards, a local variable 'info' has been
created and is a pointer to the ASID allocator structure.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 46 ++
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 1f0ea2facf24..34db54f1a39a 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -30,8 +30,11 @@
 static u32 asid_bits;
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
-static atomic64_t asid_generation;
-static unsigned long *asid_map;
+struct asid_info
+{
+   atomic64_t  generation;
+   unsigned long   *map;
+} asid_info;
 
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
@@ -88,13 +91,13 @@ void verify_cpu_asid_bits(void)
}
 }
 
-static void flush_context(void)
+static void flush_context(struct asid_info *info)
 {
int i;
u64 asid;
 
/* Update the list of reserved ASIDs and the ASID bitmap. */
-   bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+   bitmap_clear(info->map, 0, NUM_USER_ASIDS);
 
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
@@ -107,7 +110,7 @@ static void flush_context(void)
 */
if (asid == 0)
asid = per_cpu(reserved_asids, i);
-   __set_bit(asid2idx(asid), asid_map);
+   __set_bit(asid2idx(asid), info->map);
per_cpu(reserved_asids, i) = asid;
}
 
@@ -142,11 +145,11 @@ static bool check_update_reserved_asid(u64 asid, u64 
newasid)
return hit;
 }
 
-static u64 new_context(struct mm_struct *mm)
+static u64 new_context(struct asid_info *info, struct mm_struct *mm)
 {
static u32 cur_idx = 1;
u64 asid = atomic64_read(&mm->context.id);
-   u64 generation = atomic64_read(&asid_generation);
+   u64 generation = atomic64_read(&info->generation);
 
if (asid != 0) {
u64 newasid = generation | (asid & ~ASID_MASK);
@@ -162,7 +165,7 @@ static u64 new_context(struct mm_struct *mm)
 * We had a valid ASID in a previous life, so try to re-use
 * it if possible.
 */
-   if (!__test_and_set_bit(asid2idx(asid), asid_map))
+   if (!__test_and_set_bit(asid2idx(asid), info->map))
return newasid;
}
 
@@ -173,20 +176,20 @@ static u64 new_context(struct mm_struct *mm)
 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
 * pairs.
 */
-   asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, cur_idx);
if (asid != NUM_USER_ASIDS)
goto set_asid;
 
/* We're out of ASIDs, so increment the global generation count */
generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
-&asid_generation);
-   flush_context();
+&info->generation);
+   flush_context(info);
 
/* We have more ASIDs than CPUs, so this will always succeed */
-   asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, 1);
 
 set_asid:
-   __set_bit(asid, asid_map);
+   __set_bit(asid, info->map);
cur_idx = asid;
return idx2asid(asid) | generation;
 }
@@ -195,6 +198,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 {
unsigned long flags;
u64 asid, old_active_asid;
+   struct asid_info *info = &asid_info;
 
if (system_supports_cnp())
cpu_set_reserved_ttbr0();
@@ -217,7 +221,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 */
old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
if (old_active_asid &&
-   !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) &&
+   !((asid ^ atomic64_read(&info->generation)) >> asid_bits) &&
atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
 old_active_asid, asid))
goto switch_mm_fastpath;
@@ -225,8 +229,8 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
raw_spin_lock_irqsave(&cpu_asid_lock, flags);
/* Check that our ASID belongs to the cu

[PATCH RFC 06/14] arm64/mm: Store the number of asid allocated per context

2019-03-21 Thread Julien Grall
Currently the number of ASID allocated per context is determined at
compilation time. As the algorithm is becoming generic, the user may
want to instantiate the ASID allocator multiple time with different
number of ASID allocated.

Add a field in asid_info to track the number ASID allocated per context.
This is stored in term of shift amount to avoid division in the code.

This means the number of ASID allocated per context should be a power of
two.

At the same time rename NUM_USERS_ASIDS to NUM_CTXT_ASIDS to make the
name more generic.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 31 +--
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 488845c39c39..5a4c2b1aac71 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -37,6 +37,8 @@ struct asid_info
raw_spinlock_t  lock;
/* Which CPU requires context flush on next call */
cpumask_t   flush_pending;
+   /* Number of ASID allocated by context (shift value) */
+   unsigned intctxt_shift;
 } asid_info;
 
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -49,15 +51,15 @@ static DEFINE_PER_CPU(u64, reserved_asids);
 #define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define NUM_USER_ASIDS(info)   (ASID_FIRST_VERSION(info) >> 1)
-#define asid2idx(info, asid)   (((asid) & ~ASID_MASK(info)) >> 1)
-#define idx2asid(info, idx)(((idx) << 1) & ~ASID_MASK(info))
+#define ASID_PER_CONTEXT   2
 #else
-#define NUM_USER_ASIDS(info)   (ASID_FIRST_VERSION(info))
-#define asid2idx(info, asid)   ((asid) & ~ASID_MASK(info))
-#define idx2asid(info, idx)asid2idx(info, idx)
+#define ASID_PER_CONTEXT   1
 #endif
 
+#define NUM_CTXT_ASIDS(info)   (ASID_FIRST_VERSION(info) >> 
(info)->ctxt_shift)
+#define asid2idx(info, asid)   (((asid) & ~ASID_MASK(info)) >> 
(info)->ctxt_shift)
+#define idx2asid(info, idx)(((idx) << (info)->ctxt_shift) & 
~ASID_MASK(info))
+
 /* Get the ASIDBits supported by the current CPU */
 static u32 get_cpu_asid_bits(void)
 {
@@ -102,7 +104,7 @@ static void flush_context(struct asid_info *info)
u64 asid;
 
/* Update the list of reserved ASIDs and the ASID bitmap. */
-   bitmap_clear(info->map, 0, NUM_USER_ASIDS(info));
+   bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
 
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
@@ -182,8 +184,8 @@ static u64 new_context(struct asid_info *info, atomic64_t 
*pasid)
 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
 * pairs.
 */
-   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), cur_idx);
-   if (asid != NUM_USER_ASIDS(info))
+   asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
+   if (asid != NUM_CTXT_ASIDS(info))
goto set_asid;
 
/* We're out of ASIDs, so increment the global generation count */
@@ -192,7 +194,7 @@ static u64 new_context(struct asid_info *info, atomic64_t 
*pasid)
flush_context(info);
 
/* We have more ASIDs than CPUs, so this will always succeed */
-   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), 1);
+   asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
 
 set_asid:
__set_bit(asid, info->map);
@@ -272,17 +274,18 @@ static int asids_init(void)
struct asid_info *info = &asid_info;
 
info->bits = get_cpu_asid_bits();
+   info->ctxt_shift = ilog2(ASID_PER_CONTEXT);
/*
 * Expect allocation after rollover to fail if we don't have at least
 * one more ASID than CPUs. ASID #0 is reserved for init_mm.
 */
-   WARN_ON(NUM_USER_ASIDS(info) - 1 <= num_possible_cpus());
+   WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
-   info->map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS(info)),
+   info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
sizeof(*info->map), GFP_KERNEL);
if (!info->map)
panic("Failed to allocate bitmap for %lu ASIDs\n",
- NUM_USER_ASIDS(info));
+ NUM_CTXT_ASIDS(info));
 
info->active = &active_asids;
info->reserved = &reserved_asids;
@@ -290,7 +293,7 @@ static int asids_init(void)
raw_spin_lock_init(&info->lock);
 
pr_info("ASID allocator initialised with %lu entries\n",
-   NUM_USER_ASIDS(info));
+  

[PATCH RFC 11/14] arm64: Move the ASID allocator code in a separate file

2019-03-21 Thread Julien Grall
We will want to re-use the ASID allocator in a separate context (e.g
allocating VMID). So move the code in a new file.

The function asid_check_context has been moved in the header as a static
inline function because we want to avoid add a branch when checking if the
ASID is still valid.

Signed-off-by: Julien Grall 

---

This code will be used in the virt code for allocating VMID. I am not
entirely sure where to place it. Lib could potentially be a good place but I
am not entirely convinced the algo as it is could be used by other
architecture.

Looking at x86, it seems that it will not be possible to re-use because
the number of PCID (aka ASID) could be smaller than the number of CPUs.
See commit message 10af6235e0d327d42e1bad974385197817923dc1 "x86/mm:
Implement PCID based optimization: try to preserve old TLB entries using
PCI".
---
 arch/arm64/include/asm/asid.h |  77 ++
 arch/arm64/lib/Makefile   |   2 +
 arch/arm64/lib/asid.c | 185 +
 arch/arm64/mm/context.c   | 235 +-
 4 files changed, 267 insertions(+), 232 deletions(-)
 create mode 100644 arch/arm64/include/asm/asid.h
 create mode 100644 arch/arm64/lib/asid.c

diff --git a/arch/arm64/include/asm/asid.h b/arch/arm64/include/asm/asid.h
new file mode 100644
index ..bb62b587f37f
--- /dev/null
+++ b/arch/arm64/include/asm/asid.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ASM_ASID_H
+#define __ASM_ASM_ASID_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct asid_info
+{
+   atomic64_t  generation;
+   unsigned long   *map;
+   atomic64_t __percpu *active;
+   u64 __percpu*reserved;
+   u32 bits;
+   /* Lock protecting the structure */
+   raw_spinlock_t  lock;
+   /* Which CPU requires context flush on next call */
+   cpumask_t   flush_pending;
+   /* Number of ASID allocated by context (shift value) */
+   unsigned intctxt_shift;
+   /* Callback to locally flush the context. */
+   void(*flush_cpu_ctxt_cb)(void);
+};
+
+#define NUM_ASIDS(info)(1UL << ((info)->bits))
+#define NUM_CTXT_ASIDS(info)   (NUM_ASIDS(info) >> (info)->ctxt_shift)
+
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static inline void asid_check_context(struct asid_info *info,
+ atomic64_t *pasid, unsigned int cpu)
+{
+   u64 asid, old_active_asid;
+
+   asid = atomic64_read(pasid);
+
+   /*
+* The memory ordering here is subtle.
+* If our active_asid is non-zero and the ASID matches the current
+* generation, then we update the active_asid entry with a relaxed
+* cmpxchg. Racing with a concurrent rollover means that either:
+*
+* - We get a zero back from the cmpxchg and end up waiting on the
+*   lock. Taking the lock synchronises with the rollover and so
+*   we are forced to see the updated generation.
+*
+* - We get a valid ASID back from the cmpxchg, which means the
+*   relaxed xchg in flush_context will treat us as reserved
+*   because atomic RmWs are totally ordered for a given location.
+*/
+   old_active_asid = atomic64_read(&active_asid(info, cpu));
+   if (old_active_asid &&
+   !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
+   atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
+old_active_asid, asid))
+   return;
+
+   asid_new_context(info, pasid, cpu);
+}
+
+int asid_allocator_init(struct asid_info *info,
+   u32 bits, unsigned int asid_per_ctxt,
+   void (*flush_cpu_ctxt_cb)(void));
+
+#endif
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 5540a1638baf..720df5ee2aa2 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -5,6 +5,8 @@ lib-y   := clear_user.o delay.o copy_from_user.o
\
   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o   \
   strchr.o strrchr.o tishift.o
 
+lib-y  += asid.o
+
 ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
 obj-$(CONFIG_XOR_BLOCKS)   += xor-neon.o
 CFLAGS_REMOVE_xor-neon.o   += -mgeneral-regs-only
diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
new file mode 100644
index ..72b71bfb32be
--- /dev/null
++

[PATCH RFC 04/14] arm64/mm: Move the variable lock and tlb_flush_pending to asid_info

2019-03-21 Thread Julien Grall
The variables lock and tlb_flush_pending holds information for a given
ASID allocator. So move them to the asid_info structure.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index da17ed6c7117..e98ab348b9cb 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -27,8 +27,6 @@
 #include 
 #include 
 
-static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-
 struct asid_info
 {
atomic64_t  generation;
@@ -36,6 +34,9 @@ struct asid_info
atomic64_t __percpu *active;
u64 __percpu*reserved;
u32 bits;
+   raw_spinlock_t  lock;
+   /* Which CPU requires context flush on next call */
+   cpumask_t   flush_pending;
 } asid_info;
 
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -44,8 +45,6 @@ struct asid_info
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 
-static cpumask_t tlb_flush_pending;
-
 #define ASID_MASK(info)(~GENMASK((info)->bits - 1, 0))
 #define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
 
@@ -124,7 +123,7 @@ static void flush_context(struct asid_info *info)
 * Queue a TLB invalidation for each CPU to perform on next
 * context-switch
 */
-   cpumask_setall(&tlb_flush_pending);
+   cpumask_setall(&info->flush_pending);
 }
 
 static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
@@ -233,7 +232,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 old_active_asid, asid))
goto switch_mm_fastpath;
 
-   raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+   raw_spin_lock_irqsave(&info->lock, flags);
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
@@ -241,11 +240,11 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
atomic64_set(&mm->context.id, asid);
}
 
-   if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+   if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
local_flush_tlb_all();
 
atomic64_set(&active_asid(info, cpu), asid);
-   raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+   raw_spin_unlock_irqrestore(&info->lock, flags);
 
 switch_mm_fastpath:
 
@@ -288,6 +287,8 @@ static int asids_init(void)
info->active = &active_asids;
info->reserved = &reserved_asids;
 
+   raw_spin_lock_init(&info->lock);
+
pr_info("ASID allocator initialised with %lu entries\n",
NUM_USER_ASIDS(info));
return 0;
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH RFC 13/14] arm/kvm: Introduce a new VMID allocator

2019-03-21 Thread Julien Grall
A follow-up patch will replace the KVM VMID allocator with the arm64 ASID
allocator. It is not yet clear how the code can be shared between arm
and arm64, so this is a verbatim copy of arch/arm64/lib/asid.c.

Signed-off-by: Julien Grall 
---
 arch/arm/include/asm/kvm_asid.h |  81 +
 arch/arm/kvm/Makefile   |   1 +
 arch/arm/kvm/asid.c | 191 
 3 files changed, 273 insertions(+)
 create mode 100644 arch/arm/include/asm/kvm_asid.h
 create mode 100644 arch/arm/kvm/asid.c

diff --git a/arch/arm/include/asm/kvm_asid.h b/arch/arm/include/asm/kvm_asid.h
new file mode 100644
index ..f312a6d7543c
--- /dev/null
+++ b/arch/arm/include/asm/kvm_asid.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM_KVM_ASID_H__
+#define __ARM_KVM_ASID_H__
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct asid_info
+{
+   atomic64_t  generation;
+   unsigned long   *map;
+   atomic64_t __percpu *active;
+   u64 __percpu*reserved;
+   u32 bits;
+   /* Lock protecting the structure */
+   raw_spinlock_t  lock;
+   /* Which CPU requires context flush on next call */
+   cpumask_t   flush_pending;
+   /* Number of ASID allocated by context (shift value) */
+   unsigned intctxt_shift;
+   /* Callback to locally flush the context. */
+   void(*flush_cpu_ctxt_cb)(void);
+   /* Callback to call when a context is updated */
+   void(*update_ctxt_cb)(void *ctxt);
+};
+
+#define NUM_ASIDS(info)(1UL << ((info)->bits))
+#define NUM_CTXT_ASIDS(info)   (NUM_ASIDS(info) >> (info)->ctxt_shift)
+
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu, void *ctxt);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static inline void asid_check_context(struct asid_info *info,
+  atomic64_t *pasid, unsigned int cpu,
+  void *ctxt)
+{
+   u64 asid, old_active_asid;
+
+   asid = atomic64_read(pasid);
+
+   /*
+* The memory ordering here is subtle.
+* If our active_asid is non-zero and the ASID matches the current
+* generation, then we update the active_asid entry with a relaxed
+* cmpxchg. Racing with a concurrent rollover means that either:
+*
+* - We get a zero back from the cmpxchg and end up waiting on the
+*   lock. Taking the lock synchronises with the rollover and so
+*   we are forced to see the updated generation.
+*
+* - We get a valid ASID back from the cmpxchg, which means the
+*   relaxed xchg in flush_context will treat us as reserved
+*   because atomic RmWs are totally ordered for a given location.
+*/
+   old_active_asid = atomic64_read(&active_asid(info, cpu));
+   if (old_active_asid &&
+   !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
+   atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
+old_active_asid, asid))
+   return;
+
+   asid_new_context(info, pasid, cpu, ctxt);
+}
+
+int asid_allocator_init(struct asid_info *info,
+   u32 bits, unsigned int asid_per_ctxt,
+   void (*flush_cpu_ctxt_cb)(void),
+   void (*update_ctxt_cb)(void *ctxt));
+
+#endif /* __ARM_KVM_ASID_H__ */
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 531e59f5be9c..35d2d4c67827 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += handle_exit.o guest.o emulate.o reset.o
+obj-y += asid.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o   vgic-v3-coproc.o
 obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
 obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
diff --git a/arch/arm/kvm/asid.c b/arch/arm/kvm/asid.c
new file mode 100644
index ..60a25270163a
--- /dev/null
+++ b/arch/arm/kvm/asid.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic ASID allocator.
+ *
+ * Based on arch/arm/mm/context.c
+ *
+ * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#include 
+
+#include 
+
+#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
+
+#define ASID_MASK(info)(~GENMASK((info)->bits - 1, 0))
+#define ASID_FIRST_VERSION

[PATCH RFC 08/14] arm64/mm: Split asid_inits in 2 parts

2019-03-21 Thread Julien Grall
Move out the common initialization of the ASID allocator in a separate
function.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 43 +++
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index fb13bc249951..b071a1b3469e 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -271,31 +271,50 @@ asmlinkage void post_ttbr_update_workaround(void)
CONFIG_CAVIUM_ERRATUM_27456));
 }
 
-static int asids_init(void)
+/*
+ * Initialize the ASID allocator
+ *
+ * @info: Pointer to the asid allocator structure
+ * @bits: Number of ASIDs available
+ * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
+ * allocated contiguously for a given context. This value should be a power of
+ * 2.
+ */
+static int asid_allocator_init(struct asid_info *info,
+  u32 bits, unsigned int asid_per_ctxt)
 {
-   struct asid_info *info = &asid_info;
-
-   info->bits = get_cpu_asid_bits();
-   info->ctxt_shift = ilog2(ASID_PER_CONTEXT);
+   info->bits = bits;
+   info->ctxt_shift = ilog2(asid_per_ctxt);
/*
 * Expect allocation after rollover to fail if we don't have at least
-* one more ASID than CPUs. ASID #0 is reserved for init_mm.
+* one more ASID than CPUs. ASID #0 is always reserved.
 */
WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
sizeof(*info->map), GFP_KERNEL);
if (!info->map)
-   panic("Failed to allocate bitmap for %lu ASIDs\n",
- NUM_CTXT_ASIDS(info));
-
-   info->active = &active_asids;
-   info->reserved = &reserved_asids;
+   return -ENOMEM;
 
raw_spin_lock_init(&info->lock);
 
+   return 0;
+}
+
+static int asids_init(void)
+{
+   u32 bits = get_cpu_asid_bits();
+
+   if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT))
+   panic("Unable to initialize ASID allocator for %lu ASIDs\n",
+ 1UL << bits);
+
+   asid_info.active = &active_asids;
+   asid_info.reserved = &reserved_asids;
+
pr_info("ASID allocator initialised with %lu entries\n",
-   NUM_CTXT_ASIDS(info));
+   NUM_CTXT_ASIDS(&asid_info));
+
return 0;
 }
 early_initcall(asids_init);
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH RFC 14/14] kvm/arm: Align the VMID allocation with the arm64 ASID one

2019-03-21 Thread Julien Grall
At the moment, the VMID algorithm will send an SGI to all the CPUs to
force an exit and then broadcast a full TLB flush and I-Cache
invalidation.

This patch re-use the new ASID allocator. The
benefits are:
- CPUs are not forced to exit at roll-over. Instead the VMID will be
marked reserved and the context will be flushed at next exit. This
will reduce the IPIs traffic.
- Context invalidation is now per-CPU rather than broadcasted.

With the new algo, the code is now adapted:
- The function __kvm_flush_vm_context() has been renamed to
__kvm_flush_cpu_vmid_context and now only flushing the current CPU context.
- The call to update_vttbr() will be done with preemption disabled
as the new algo requires to store information per-CPU.
- The TLBs associated to EL1 will be flushed when booting a CPU to
deal with stale information. This was previously done on the
allocation of the first VMID of a new generation.

The measurement was made on a Seattle based SoC (8 CPUs), with the
number of VMID limited to 4-bit. The test involves running concurrently 40
guests with 2 vCPUs. Each guest will then execute hackbench 5 times
before exiting.

The performance difference between the current algo and the new one are:
- 2.5% less exit from the guest
- 22.4% more flush, although they are now local rather than
broadcasted
- 0.11% faster (just for the record)

Signed-off-by: Julien Grall 


Looking at the __kvm_flush_vm_context, it might be possible to
reduce more the overhead by removing the I-Cache flush for other
cache than VIPT. This has been left aside for now.
---
 arch/arm/include/asm/kvm_asm.h|   2 +-
 arch/arm/include/asm/kvm_host.h   |   5 +-
 arch/arm/include/asm/kvm_hyp.h|   1 +
 arch/arm/kvm/hyp/tlb.c|   8 +--
 arch/arm64/include/asm/kvm_asid.h |   8 +++
 arch/arm64/include/asm/kvm_asm.h  |   2 +-
 arch/arm64/include/asm/kvm_host.h |   5 +-
 arch/arm64/kvm/hyp/tlb.c  |  10 ++--
 virt/kvm/arm/arm.c| 112 +-
 9 files changed, 61 insertions(+), 92 deletions(-)
 create mode 100644 arch/arm64/include/asm/kvm_asid.h

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 35491af87985..ce60a4a46fcc 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -65,7 +65,7 @@ struct kvm_vcpu;
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
 
-extern void __kvm_flush_vm_context(void);
+extern void __kvm_flush_cpu_vmid_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 770d73257ad9..e2c3a4a7b020 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -59,8 +59,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
 struct kvm_vmid {
-   /* The VMID generation used for the virt. memory system */
-   u64vmid_gen;
+   /* The ASID used for the ASID allocator */
+   atomic64_t asid;
u32vmid;
 };
 
@@ -264,7 +264,6 @@ unsigned long __kvm_call_hyp(void *hypfn, ...);
ret;\
})
 
-void force_vm_exit(const cpumask_t *mask);
 int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
  struct kvm_vcpu_events *events);
 
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 87bcd18df8d5..c3d1011ca1bf 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -75,6 +75,7 @@
 #define TLBIALLIS  __ACCESS_CP15(c8, 0, c3, 0)
 #define TLBIALL__ACCESS_CP15(c8, 0, c7, 0)
 #define TLBIALLNSNHIS  __ACCESS_CP15(c8, 4, c3, 4)
+#define TLBIALLNSNH__ACCESS_CP15(c8, 4, c7, 4)
 #define PRRR   __ACCESS_CP15(c10, 0, c2, 0)
 #define NMRR   __ACCESS_CP15(c10, 0, c2, 1)
 #define AMAIR0 __ACCESS_CP15(c10, 0, c3, 0)
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 8e4afba73635..42b9ab47fc94 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -71,9 +71,9 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu 
*vcpu)
write_sysreg(0, VTTBR);
 }
 
-void __hyp_text __kvm_flush_vm_context(void)
+void __hyp_text __kvm_flush_cpu_vmid_context(void)
 {
-   write_sysreg(0, TLBIALLNSNHIS);
-   write_sysreg(0, ICIALLUIS);
-   dsb(ish);
+   write_sysreg(0, TLBIALLNSNH);
+   write_sysreg(0, ICIALLU);
+   dsb(nsh);
 }
diff --git a/arch/arm64/include/asm/kvm_asid.h 
b/arch/arm64/include/asm/kvm_asid.h
new file mode 100644
index ..8b586e43c094
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_asid.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL

[PATCH RFC 02/14] arm64/mm: Move active_asids and reserved_asids to asid_info

2019-03-21 Thread Julien Grall
The variables active_asids and reserved_asids hold information for a
given ASID allocator. So move them to the structure asid_info.

At the same time, introduce wrappers to access the active and reserved
ASIDs to make the code clearer.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 34 ++
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 34db54f1a39a..cfe4c5f7abf3 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -34,10 +34,16 @@ struct asid_info
 {
atomic64_t  generation;
unsigned long   *map;
+   atomic64_t __percpu *active;
+   u64 __percpu*reserved;
 } asid_info;
 
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
+
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
+
 static cpumask_t tlb_flush_pending;
 
 #define ASID_MASK  (~GENMASK(asid_bits - 1, 0))
@@ -100,7 +106,7 @@ static void flush_context(struct asid_info *info)
bitmap_clear(info->map, 0, NUM_USER_ASIDS);
 
for_each_possible_cpu(i) {
-   asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
+   asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
/*
 * If this CPU has already been through a
 * rollover, but hasn't run another task in
@@ -109,9 +115,9 @@ static void flush_context(struct asid_info *info)
 * the process it is still running.
 */
if (asid == 0)
-   asid = per_cpu(reserved_asids, i);
+   asid = reserved_asid(info, i);
__set_bit(asid2idx(asid), info->map);
-   per_cpu(reserved_asids, i) = asid;
+   reserved_asid(info, i) = asid;
}
 
/*
@@ -121,7 +127,8 @@ static void flush_context(struct asid_info *info)
cpumask_setall(&tlb_flush_pending);
 }
 
-static bool check_update_reserved_asid(u64 asid, u64 newasid)
+static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
+  u64 newasid)
 {
int cpu;
bool hit = false;
@@ -136,9 +143,9 @@ static bool check_update_reserved_asid(u64 asid, u64 
newasid)
 * generation.
 */
for_each_possible_cpu(cpu) {
-   if (per_cpu(reserved_asids, cpu) == asid) {
+   if (reserved_asid(info, cpu) == asid) {
hit = true;
-   per_cpu(reserved_asids, cpu) = newasid;
+   reserved_asid(info, cpu) = newasid;
}
}
 
@@ -158,7 +165,7 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
 * If our current ASID was active during a rollover, we
 * can continue to use it and this was just a false alarm.
 */
-   if (check_update_reserved_asid(asid, newasid))
+   if (check_update_reserved_asid(info, asid, newasid))
return newasid;
 
/*
@@ -207,8 +214,8 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 
/*
 * The memory ordering here is subtle.
-* If our active_asids is non-zero and the ASID matches the current
-* generation, then we update the active_asids entry with a relaxed
+* If our active_asid is non-zero and the ASID matches the current
+* generation, then we update the active_asid entry with a relaxed
 * cmpxchg. Racing with a concurrent rollover means that either:
 *
 * - We get a zero back from the cmpxchg and end up waiting on the
@@ -219,10 +226,10 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 *   relaxed xchg in flush_context will treat us as reserved
 *   because atomic RmWs are totally ordered for a given location.
 */
-   old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
+   old_active_asid = atomic64_read(&active_asid(info, cpu));
if (old_active_asid &&
!((asid ^ atomic64_read(&info->generation)) >> asid_bits) &&
-   atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
+   atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
 old_active_asid, asid))
goto switch_mm_fastpath;
 
@@ -237,7 +244,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
local_flush_tlb_all();
 
-   atomic64_set(&per_cpu(active_asids, cpu), asid);
+   atomic64_set(&active_asid(info, cpu), asid);
 

[PATCH RFC 12/14] arm64/lib: asid: Allow user to update the context under the lock

2019-03-21 Thread Julien Grall
Some users of the ASID allocator (e.g VMID) will require to update the
context when a new ASID is generated. This has to be protected by a lock
to prevent concurrent modification.

Rather than introducing yet another lock, it is possible to re-use the
allocator lock for that purpose. This patch introduces a new callback
that will be call when updating the context.

Signed-off-by: Julien Grall 
---
 arch/arm64/include/asm/asid.h | 12 
 arch/arm64/lib/asid.c | 10 --
 arch/arm64/mm/context.c   | 11 ---
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/asid.h b/arch/arm64/include/asm/asid.h
index bb62b587f37f..d8d9dc875bec 100644
--- a/arch/arm64/include/asm/asid.h
+++ b/arch/arm64/include/asm/asid.h
@@ -23,6 +23,8 @@ struct asid_info
unsigned intctxt_shift;
/* Callback to locally flush the context. */
void(*flush_cpu_ctxt_cb)(void);
+   /* Callback to call when a context is updated */
+   void(*update_ctxt_cb)(void *ctxt);
 };
 
 #define NUM_ASIDS(info)(1UL << ((info)->bits))
@@ -31,7 +33,7 @@ struct asid_info
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
 
 void asid_new_context(struct asid_info *info, atomic64_t *pasid,
- unsigned int cpu);
+ unsigned int cpu, void *ctxt);
 
 /*
  * Check the ASID is still valid for the context. If not generate a new ASID.
@@ -40,7 +42,8 @@ void asid_new_context(struct asid_info *info, atomic64_t 
*pasid,
  * @cpu: current CPU ID. Must have been acquired throught get_cpu()
  */
 static inline void asid_check_context(struct asid_info *info,
- atomic64_t *pasid, unsigned int cpu)
+  atomic64_t *pasid, unsigned int cpu,
+  void *ctxt)
 {
u64 asid, old_active_asid;
 
@@ -67,11 +70,12 @@ static inline void asid_check_context(struct asid_info 
*info,
 old_active_asid, asid))
return;
 
-   asid_new_context(info, pasid, cpu);
+   asid_new_context(info, pasid, cpu, ctxt);
 }
 
 int asid_allocator_init(struct asid_info *info,
u32 bits, unsigned int asid_per_ctxt,
-   void (*flush_cpu_ctxt_cb)(void));
+   void (*flush_cpu_ctxt_cb)(void),
+   void (*update_ctxt_cb)(void *ctxt));
 
 #endif
diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
index 72b71bfb32be..b47e6769c1bc 100644
--- a/arch/arm64/lib/asid.c
+++ b/arch/arm64/lib/asid.c
@@ -130,9 +130,10 @@ static u64 new_context(struct asid_info *info, atomic64_t 
*pasid)
  * @pasid: Pointer to the current ASID batch allocated. It will be updated
  * with the new ASID batch.
  * @cpu: current CPU ID. Must have been acquired through get_cpu()
+ * @ctxt: Context to update when calling update_context
  */
 void asid_new_context(struct asid_info *info, atomic64_t *pasid,
- unsigned int cpu)
+ unsigned int cpu, void *ctxt)
 {
unsigned long flags;
u64 asid;
@@ -149,6 +150,9 @@ void asid_new_context(struct asid_info *info, atomic64_t 
*pasid,
info->flush_cpu_ctxt_cb();
 
atomic64_set(&active_asid(info, cpu), asid);
+
+   info->update_ctxt_cb(ctxt);
+
raw_spin_unlock_irqrestore(&info->lock, flags);
 }
 
@@ -163,11 +167,13 @@ void asid_new_context(struct asid_info *info, atomic64_t 
*pasid,
  */
 int asid_allocator_init(struct asid_info *info,
u32 bits, unsigned int asid_per_ctxt,
-   void (*flush_cpu_ctxt_cb)(void))
+   void (*flush_cpu_ctxt_cb)(void),
+   void (*update_ctxt_cb)(void *ctxt))
 {
info->bits = bits;
info->ctxt_shift = ilog2(asid_per_ctxt);
info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
+   info->update_ctxt_cb = update_ctxt_cb;
/*
 * Expect allocation after rollover to fail if we don't have at least
 * one more ASID than CPUs. ASID #0 is always reserved.
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 95ee7711a2ef..737b4bd7bbe7 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -82,7 +82,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned 
int cpu)
if (system_supports_cnp())
cpu_set_reserved_ttbr0();
 
-   asid_check_context(&asid_info, &mm->context.id, cpu);
+   asid_check_context(&asid_info, &mm->context.id, cpu, mm);
 
arm64_apply_bp_hardening();
 
@@ -108,12 +108,17 @@ static void asid_flush_cpu_ctxt(void)
local_flush_tlb_all();
 }
 
+static void asid_update_ctxt(void *ctxt)
+{
+   /* Nothing to do */
+}
+
 static int asid

[PATCH RFC 09/14] arm64/mm: Split the function check_and_switch_context in 3 parts

2019-03-21 Thread Julien Grall
The function check_and_switch_context is used to:
1) Check whether the ASID is still valid
2) Generate a new one if it is not valid
3) Switch the context

While the latter is specific to the MM subsystem, the rest could be part
of the generic ASID allocator.

After this patch, the function is now split in 3 parts which corresponds
to the use of the functions:
1) asid_check_context: Check if the ASID is still valid
2) asid_new_context: Generate a new ASID for the context
3) check_and_switch_context: Call 1) and 2) and switch the context

1) and 2) have not been merged in a single function because we want to
avoid to add a branch in when the ASID is still valid. This will matter
when the code will be moved in separate file later on as 1) will reside
in the header as a static inline function.

Signed-off-by: Julien Grall 

---

Will wants to avoid to add a branch when the ASID is still valid. So
1) and 2) are in separates function. The former will move to a new
header and make static inline.
---
 arch/arm64/mm/context.c | 51 +
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b071a1b3469e..cbf1c24cb3ee 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -204,16 +204,21 @@ static u64 new_context(struct asid_info *info, atomic64_t 
*pasid)
return idx2asid(info, asid) | generation;
 }
 
-void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
+static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+unsigned int cpu);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static void asid_check_context(struct asid_info *info,
+  atomic64_t *pasid, unsigned int cpu)
 {
-   unsigned long flags;
u64 asid, old_active_asid;
-   struct asid_info *info = &asid_info;
 
-   if (system_supports_cnp())
-   cpu_set_reserved_ttbr0();
-
-   asid = atomic64_read(&mm->context.id);
+   asid = atomic64_read(pasid);
 
/*
 * The memory ordering here is subtle.
@@ -234,14 +239,30 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
!((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
 old_active_asid, asid))
-   goto switch_mm_fastpath;
+   return;
+
+   asid_new_context(info, pasid, cpu);
+}
+
+/*
+ * Generate a new ASID for the context.
+ *
+ * @pasid: Pointer to the current ASID batch allocated. It will be updated
+ * with the new ASID batch.
+ * @cpu: current CPU ID. Must have been acquired through get_cpu()
+ */
+static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+unsigned int cpu)
+{
+   unsigned long flags;
+   u64 asid;
 
raw_spin_lock_irqsave(&info->lock, flags);
/* Check that our ASID belongs to the current generation. */
-   asid = atomic64_read(&mm->context.id);
+   asid = atomic64_read(pasid);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
-   asid = new_context(info, &mm->context.id);
-   atomic64_set(&mm->context.id, asid);
+   asid = new_context(info, pasid);
+   atomic64_set(pasid, asid);
}
 
if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
@@ -249,8 +270,14 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 
atomic64_set(&active_asid(info, cpu), asid);
raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
+{
+   if (system_supports_cnp())
+   cpu_set_reserved_ttbr0();
 
-switch_mm_fastpath:
+   asid_check_context(&asid_info, &mm->context.id, cpu);
 
arm64_apply_bp_hardening();
 
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH RFC 05/14] arm64/mm: Remove dependency on MM in new_context

2019-03-21 Thread Julien Grall
The function new_context will be part of a generic ASID allocator. At
the moment, the MM structure is only used to fetch the ASID.

To remove the dependency on MM, it is possible to just pass a pointer to
the current ASID.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e98ab348b9cb..488845c39c39 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -151,10 +151,10 @@ static bool check_update_reserved_asid(struct asid_info 
*info, u64 asid,
return hit;
 }
 
-static u64 new_context(struct asid_info *info, struct mm_struct *mm)
+static u64 new_context(struct asid_info *info, atomic64_t *pasid)
 {
static u32 cur_idx = 1;
-   u64 asid = atomic64_read(&mm->context.id);
+   u64 asid = atomic64_read(pasid);
u64 generation = atomic64_read(&info->generation);
 
if (asid != 0) {
@@ -236,7 +236,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
-   asid = new_context(info, mm);
+   asid = new_context(info, &mm->context.id);
atomic64_set(&mm->context.id, asid);
}
 
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH RFC 03/14] arm64/mm: Move bits to asid_info

2019-03-21 Thread Julien Grall
The variable bits hold information for a given ASID allocator. So move
it to the asid_info structure.

Because most of the macros were relying on bits, they are now taking an
extra parameter that is a pointer to the asid_info structure.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 59 +
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index cfe4c5f7abf3..da17ed6c7117 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -27,7 +27,6 @@
 #include 
 #include 
 
-static u32 asid_bits;
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
 struct asid_info
@@ -36,6 +35,7 @@ struct asid_info
unsigned long   *map;
atomic64_t __percpu *active;
u64 __percpu*reserved;
+   u32 bits;
 } asid_info;
 
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -46,17 +46,17 @@ static DEFINE_PER_CPU(u64, reserved_asids);
 
 static cpumask_t tlb_flush_pending;
 
-#define ASID_MASK  (~GENMASK(asid_bits - 1, 0))
-#define ASID_FIRST_VERSION (1UL << asid_bits)
+#define ASID_MASK(info)(~GENMASK((info)->bits - 1, 0))
+#define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1)
-#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1)
-#define idx2asid(idx)  (((idx) << 1) & ~ASID_MASK)
+#define NUM_USER_ASIDS(info)   (ASID_FIRST_VERSION(info) >> 1)
+#define asid2idx(info, asid)   (((asid) & ~ASID_MASK(info)) >> 1)
+#define idx2asid(info, idx)(((idx) << 1) & ~ASID_MASK(info))
 #else
-#define NUM_USER_ASIDS (ASID_FIRST_VERSION)
-#define asid2idx(asid) ((asid) & ~ASID_MASK)
-#define idx2asid(idx)  asid2idx(idx)
+#define NUM_USER_ASIDS(info)   (ASID_FIRST_VERSION(info))
+#define asid2idx(info, asid)   ((asid) & ~ASID_MASK(info))
+#define idx2asid(info, idx)asid2idx(info, idx)
 #endif
 
 /* Get the ASIDBits supported by the current CPU */
@@ -86,13 +86,13 @@ void verify_cpu_asid_bits(void)
 {
u32 asid = get_cpu_asid_bits();
 
-   if (asid < asid_bits) {
+   if (asid < asid_info.bits) {
/*
 * We cannot decrease the ASID size at runtime, so panic if we 
support
 * fewer ASID bits than the boot CPU.
 */
pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
-   smp_processor_id(), asid, asid_bits);
+   smp_processor_id(), asid, asid_info.bits);
cpu_panic_kernel();
}
 }
@@ -103,7 +103,7 @@ static void flush_context(struct asid_info *info)
u64 asid;
 
/* Update the list of reserved ASIDs and the ASID bitmap. */
-   bitmap_clear(info->map, 0, NUM_USER_ASIDS);
+   bitmap_clear(info->map, 0, NUM_USER_ASIDS(info));
 
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
@@ -116,7 +116,7 @@ static void flush_context(struct asid_info *info)
 */
if (asid == 0)
asid = reserved_asid(info, i);
-   __set_bit(asid2idx(asid), info->map);
+   __set_bit(asid2idx(info, asid), info->map);
reserved_asid(info, i) = asid;
}
 
@@ -159,7 +159,7 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
u64 generation = atomic64_read(&info->generation);
 
if (asid != 0) {
-   u64 newasid = generation | (asid & ~ASID_MASK);
+   u64 newasid = generation | (asid & ~ASID_MASK(info));
 
/*
 * If our current ASID was active during a rollover, we
@@ -172,7 +172,7 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
 * We had a valid ASID in a previous life, so try to re-use
 * it if possible.
 */
-   if (!__test_and_set_bit(asid2idx(asid), info->map))
+   if (!__test_and_set_bit(asid2idx(info, asid), info->map))
return newasid;
}
 
@@ -183,22 +183,22 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
 * pairs.
 */
-   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, cur_idx);
-   if (asid != NUM_USER_ASIDS)
+   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), cur_idx);
+   if (asid != NUM_USER_ASIDS(info))
goto set_asid;
 
/* We're out of ASIDs, so increment the

[PATCH RFC 07/14] arm64/mm: Introduce NUM_ASIDS

2019-03-21 Thread Julien Grall
At the moment ASID_FIRST_VERSION is used to know the number of ASIDs
supported. As we are going to move the ASID allocator in a separate, it
would be better to use a different name for external users.

This patch adds NUM_ASIDS and implements ASID_FIRST_VERSION using it.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 5a4c2b1aac71..fb13bc249951 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -48,7 +48,9 @@ static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 
 #define ASID_MASK(info)(~GENMASK((info)->bits - 1, 0))
-#define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
+#define NUM_ASIDS(info)(1UL << ((info)->bits))
+
+#define ASID_FIRST_VERSION(info)   NUM_ASIDS(info)
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 #define ASID_PER_CONTEXT   2
@@ -56,7 +58,7 @@ static DEFINE_PER_CPU(u64, reserved_asids);
 #define ASID_PER_CONTEXT   1
 #endif
 
-#define NUM_CTXT_ASIDS(info)   (ASID_FIRST_VERSION(info) >> 
(info)->ctxt_shift)
+#define NUM_CTXT_ASIDS(info)   (NUM_ASIDS(info) >> (info)->ctxt_shift)
 #define asid2idx(info, asid)   (((asid) & ~ASID_MASK(info)) >> 
(info)->ctxt_shift)
 #define idx2asid(info, idx)(((idx) << (info)->ctxt_shift) & 
~ASID_MASK(info))
 
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH RFC 10/14] arm64/mm: Introduce a callback to flush the local context

2019-03-21 Thread Julien Grall
Flushing the local context will vary depending on the actual user of the ASID
allocator. Introduce a new callback to flush the local context and move
the call to flush local TLB in it.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index cbf1c24cb3ee..678a57b77c91 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -39,6 +39,8 @@ struct asid_info
cpumask_t   flush_pending;
/* Number of ASID allocated by context (shift value) */
unsigned intctxt_shift;
+   /* Callback to locally flush the context. */
+   void(*flush_cpu_ctxt_cb)(void);
 } asid_info;
 
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -266,7 +268,7 @@ static void asid_new_context(struct asid_info *info, 
atomic64_t *pasid,
}
 
if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
-   local_flush_tlb_all();
+   info->flush_cpu_ctxt_cb();
 
atomic64_set(&active_asid(info, cpu), asid);
raw_spin_unlock_irqrestore(&info->lock, flags);
@@ -298,6 +300,11 @@ asmlinkage void post_ttbr_update_workaround(void)
CONFIG_CAVIUM_ERRATUM_27456));
 }
 
+static void asid_flush_cpu_ctxt(void)
+{
+   local_flush_tlb_all();
+}
+
 /*
  * Initialize the ASID allocator
  *
@@ -308,10 +315,12 @@ asmlinkage void post_ttbr_update_workaround(void)
  * 2.
  */
 static int asid_allocator_init(struct asid_info *info,
-  u32 bits, unsigned int asid_per_ctxt)
+  u32 bits, unsigned int asid_per_ctxt,
+  void (*flush_cpu_ctxt_cb)(void))
 {
info->bits = bits;
info->ctxt_shift = ilog2(asid_per_ctxt);
+   info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
/*
 * Expect allocation after rollover to fail if we don't have at least
 * one more ASID than CPUs. ASID #0 is always reserved.
@@ -332,7 +341,8 @@ static int asids_init(void)
 {
u32 bits = get_cpu_asid_bits();
 
-   if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT))
+   if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
+asid_flush_cpu_ctxt))
panic("Unable to initialize ASID allocator for %lu ASIDs\n",
  1UL << bits);
 
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH RFC 01/14] arm64/mm: Introduce asid_info structure and move asid_generation/asid_map to it

2019-03-21 Thread Julien Grall

On 3/21/19 5:03 PM, Suzuki K Poulose wrote:

Hi Julien,


Hi Suzuki,


On 21/03/2019 16:36, Julien Grall wrote:

In an attempt to make the ASID allocator generic, create a new structure
asid_info to store all the information necessary for the allocator.

For now, move the variables asid_generation and asid_map to the new 
structure

asid_info. Follow-up patches will move more variables.

Note to avoid more renaming aftwards, a local variable 'info' has been
created and is a pointer to the ASID allocator structure.

Signed-off-by: Julien Grall 
---
  arch/arm64/mm/context.c | 46 
++

  1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 1f0ea2facf24..34db54f1a39a 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -30,8 +30,11 @@
  static u32 asid_bits;
  static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-static atomic64_t asid_generation;
-static unsigned long *asid_map;
+struct asid_info
+{
+    atomic64_t    generation;
+    unsigned long    *map;
+} asid_info;


Shouldn't this be static ? Rest looks fine.


Yes it should be static. I have updated my code.

Thank you for the review!

Cheers,



Cheers
Suzuki


--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 1/3] arm64/fpsimd: Remove the prototype for sve_flush_cpu_state()

2019-05-21 Thread Julien Grall
The function sve_flush_cpu_state() has been removed in commit 21cdd7fd76e3
("KVM: arm64: Remove eager host SVE state saving").

So remove the associated prototype in asm/fpsimd.h.

Signed-off-by: Julien Grall 
Reviewed-by: Dave Martin 

---
Changes in v3:
- Add Dave's reviewed-by
- Fix checkpatch style error when mentioning a commit

Changes in v2:
- Patch added
---
 arch/arm64/include/asm/fpsimd.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index df62bbd33a9a..b73d12fcc7f9 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -64,7 +64,6 @@ extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state 
*state,
 
 extern void fpsimd_flush_task_state(struct task_struct *target);
 extern void fpsimd_flush_cpu_state(void);
-extern void sve_flush_cpu_state(void);
 
 /* Maximum VL that SVE VL-agnostic software can transparently support */
 #define SVE_VL_ARCH_MAX 0x100
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 0/3] arm64/fpsimd: Don't disable softirq when touching FPSIMD/SVE state

2019-05-21 Thread Julien Grall
Hi all,

This patch series keeps softirqs enabled while touching FPSIMD/SVE state.
For more details on the impact see patch #3.

This patch series has been benchmarked on Linux 5.1-rc4 with defconfig.

On Juno2:
* hackbench 100 process 1000 (10 times)
* .7% quicker

On ThunderX 2:
* hackbench 1000 process 1000 (20 times)
* 3.4% quicker

Note that while the benchmark has been done on 5.1-rc4, the patch series is
based on 5.2-rc1.

Cheers,

Julien Grall (3):
  arm64/fpsimd: Remove the prototype for sve_flush_cpu_state()
  arch/arm64: fpsimd: Introduce fpsimd_save_and_flush_cpu_state() and
use it
  arm64/fpsimd: Don't disable softirq when touching FPSIMD/SVE state

 arch/arm64/include/asm/fpsimd.h |   5 +-
 arch/arm64/include/asm/simd.h   |  10 +--
 arch/arm64/kernel/fpsimd.c  | 139 +++-
 arch/arm64/kvm/fpsimd.c |   4 +-
 4 files changed, 103 insertions(+), 55 deletions(-)

-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v5 2/3] arch/arm64: fpsimd: Introduce fpsimd_save_and_flush_cpu_state() and use it

2019-05-21 Thread Julien Grall
The only external user of fpsimd_save() and fpsimd_flush_cpu_state() is
the KVM FPSIMD code.

A following patch will introduce a mechanism to acquire owernship of the
FPSIMD/SVE context for performing context management operations. Rather
than having to export the new helpers to get/put the context, we can just
introduce a new function to combine fpsimd_save() and
fpsimd_flush_cpu_state().

This has also the advantage to remove any external call of fpsimd_save()
and fpsimd_flush_cpu_state(), so they can be turned static.

Lastly, the new function can also be used in the PM notifier.

Signed-off-by: Julien Grall 
Reviewed-by: Dave Martin 

---
kernel_neon_begin() does not use fpsimd_save_and_flush_cpu_state()
because the next patch will modify the function to also grab the
FPSIMD/SVE context.

Changes in v4:
- Remove newline before the new prototype
- Add Dave's reviewed-by

Changes in v3:
- Rework the commit message
- Move the prototype of fpsimd_save_and_flush_cpu_state()
further down in the header
- Remove comment in kvm_arch_vcpu_put_fp()

Changes in v2:
- Patch added
---
 arch/arm64/include/asm/fpsimd.h |  4 +---
 arch/arm64/kernel/fpsimd.c  | 17 +
 arch/arm64/kvm/fpsimd.c |  4 +---
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index b73d12fcc7f9..4154851c21ab 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -48,8 +48,6 @@ struct task_struct;
 extern void fpsimd_save_state(struct user_fpsimd_state *state);
 extern void fpsimd_load_state(struct user_fpsimd_state *state);
 
-extern void fpsimd_save(void);
-
 extern void fpsimd_thread_switch(struct task_struct *next);
 extern void fpsimd_flush_thread(void);
 
@@ -63,7 +61,7 @@ extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state 
*state,
 void *sve_state, unsigned int sve_vl);
 
 extern void fpsimd_flush_task_state(struct task_struct *target);
-extern void fpsimd_flush_cpu_state(void);
+extern void fpsimd_save_and_flush_cpu_state(void);
 
 /* Maximum VL that SVE VL-agnostic software can transparently support */
 #define SVE_VL_ARCH_MAX 0x100
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index a38bf74bcca8..6448921a2f59 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -246,7 +246,7 @@ static void task_fpsimd_load(void)
  *
  * Softirqs (and preemption) must be disabled.
  */
-void fpsimd_save(void)
+static void fpsimd_save(void)
 {
struct fpsimd_last_state_struct const *last =
this_cpu_ptr(&fpsimd_last_state);
@@ -1122,12 +1122,22 @@ void fpsimd_flush_task_state(struct task_struct *t)
  * Invalidate any task's FPSIMD state that is present on this cpu.
  * This function must be called with softirqs disabled.
  */
-void fpsimd_flush_cpu_state(void)
+static void fpsimd_flush_cpu_state(void)
 {
__this_cpu_write(fpsimd_last_state.st, NULL);
set_thread_flag(TIF_FOREIGN_FPSTATE);
 }
 
+/*
+ * Save the FPSIMD state to memory and invalidate cpu view.
+ * This function must be called with softirqs (and preemption) disabled.
+ */
+void fpsimd_save_and_flush_cpu_state(void)
+{
+   fpsimd_save();
+   fpsimd_flush_cpu_state();
+}
+
 #ifdef CONFIG_KERNEL_MODE_NEON
 
 DEFINE_PER_CPU(bool, kernel_neon_busy);
@@ -1284,8 +1294,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block 
*self,
 {
switch (cmd) {
case CPU_PM_ENTER:
-   fpsimd_save();
-   fpsimd_flush_cpu_state();
+   fpsimd_save_and_flush_cpu_state();
break;
case CPU_PM_EXIT:
break;
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 6e3c9c8b2df9..525010504f9d 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -112,9 +112,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1];
 
-   /* Clean guest FP state to memory and invalidate cpu view */
-   fpsimd_save();
-   fpsimd_flush_cpu_state();
+   fpsimd_save_and_flush_cpu_state();
 
if (guest_has_sve)
*guest_zcr = read_sysreg_s(SYS_ZCR_EL12);
-- 
2.11.0



[PATCH v5 3/3] arm64/fpsimd: Don't disable softirq when touching FPSIMD/SVE state

2019-05-21 Thread Julien Grall
When the kernel is compiled with CONFIG_KERNEL_MODE_NEON, some part of
the kernel may be able to use FPSIMD/SVE. This is for instance the case
for crypto code.

Any use of FPSIMD/SVE in the kernel are clearly marked by using the
function kernel_neon_{begin, end}. Furthermore, this can only be used
when may_use_simd() returns true.

The current implementation of may_use_simd() allows softirq to use
FPSIMD/SVE unless it is currently in use (i.e kernel_neon_busy is true).
When in use, softirqs usually fall back to a software method.

At the moment, as a softirq may use FPSIMD/SVE, softirqs are disabled
when touching the FPSIMD/SVE context. This has the drawback to disable
all softirqs even if they are not using FPSIMD/SVE.

Since a softirq is supposed to check may_use_simd() anyway before
attempting to use FPSIMD/SVE, there is limited reason to keep softirq
disabled when touching the FPSIMD/SVE context. Instead, we can simply
disable preemption and mark the FPSIMD/SVE context as in use by setting
CPU's fpsimd_context_busy flag.

Two new helpers {get, put}_cpu_fpsimd_context are introduced to mark
the area using FPSIMD/SVE context and they are used to replace
local_bh_{disable, enable}. The functions kernel_neon_{begin, end} are
also re-implemented to use the new helpers.

Additionally, double-underscored versions of the helpers are provided to
called when preemption is already disabled. These are only relevant on
paths where irqs are disabled anyway, so they are not needed for
correctness in the current code. Let's use them anyway though: this
marks critical sections clearly and will help to avoid mistakes during
future maintenance.

The change has been benchmarked on Linux 5.1-rc4 with defconfig.

On Juno2:
* hackbench 100 process 1000 (10 times)
* .7% quicker

On ThunderX 2:
* hackbench 1000 process 1000 (20 times)
* 3.4% quicker

Signed-off-by: Julien Grall 
Reviewed-by: Dave Martin 

---
Changes in v5:
- Update commit message
- Add Dave's reviewed-by

Changes in v4:
- Clarify the comment on top of get_cpu_fpsimd_context()
- Use double-underscore version in fpsimd_save_and_flush_cpu_state()

Changes in v3:
- Fix typoes in the commit message
- Rework a bit the commit message
- Use imperative mood
- Rename kernel_neon_busy to fpsimd_context_busy
- Remove debug code
- Update comments
- Don't require preemption when calling 
fpsimd_save_and_flush_cpu_state()

Changes in v2:
- Remove spurious call to kernel_neon_enable in kernel_neon_begin.
- Rename kernel_neon_{enable, disable} to {get, put}_cpu_fpsimd_context
- Introduce a double-underscore version of the helpers for case
where preemption is already disabled
- Introduce have_cpu_fpsimd_context() and use it in WARN_ON(...)
- Surround more places in the code with the new helpers
- Rework the comments
- Update the commit message with the benchmark result
---
 arch/arm64/include/asm/simd.h |  10 ++--
 arch/arm64/kernel/fpsimd.c| 124 --
 2 files changed, 89 insertions(+), 45 deletions(-)

diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 6495cc51246f..a6307e43b8c2 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -15,9 +15,9 @@
 #include 
 #include 
 
-#ifdef CONFIG_KERNEL_MODE_NEON
+DECLARE_PER_CPU(bool, fpsimd_context_busy);
 
-DECLARE_PER_CPU(bool, kernel_neon_busy);
+#ifdef CONFIG_KERNEL_MODE_NEON
 
 /*
  * may_use_simd - whether it is allowable at this time to issue SIMD
@@ -29,15 +29,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy);
 static __must_check inline bool may_use_simd(void)
 {
/*
-* kernel_neon_busy is only set while preemption is disabled,
+* fpsimd_context_busy is only set while preemption is disabled,
 * and is clear whenever preemption is enabled. Since
-* this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy
+* this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
 * cannot change under our feet -- if it's set we cannot be
 * migrated, and if it's clear we cannot be migrated to a CPU
 * where it is set.
 */
return !in_irq() && !irqs_disabled() && !in_nmi() &&
-   !this_cpu_read(kernel_neon_busy);
+   !this_cpu_read(fpsimd_context_busy);
 }
 
 #else /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 6448921a2f59..c7c454df2779 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -92,7 +92,8 @@
  * To prevent this from racing with the manipulation of the task's FPSIMD state
  * from task context and thereby corrupting the state, it is necessary to
  * protect any manipulation of a task's fp

Re: [PATCH v5 3/3] arm64/fpsimd: Don't disable softirq when touching FPSIMD/SVE state

2019-06-04 Thread Julien Grall

Hi Catalin,

On 6/3/19 10:21 PM, Catalin Marinas wrote:

On Mon, Jun 03, 2019 at 05:25:34PM +0100, Catalin Marinas wrote:

On Tue, May 21, 2019 at 06:21:39PM +0100, Julien Grall wrote:

Since a softirq is supposed to check may_use_simd() anyway before
attempting to use FPSIMD/SVE, there is limited reason to keep softirq
disabled when touching the FPSIMD/SVE context. Instead, we can simply
disable preemption and mark the FPSIMD/SVE context as in use by setting
CPU's fpsimd_context_busy flag.

[...]

+static void get_cpu_fpsimd_context(void)
+{
+   preempt_disable();
+   __get_cpu_fpsimd_context();
+}


Is there anything that prevents a softirq being invoked between
preempt_disable() and __get_cpu_fpsimd_context()?


Actually, it shouldn't matter as the softirq finishes using the fpsimd
before the thread is resumed.


If the softirqs is handled in a thread (i.e ksoftirqd), then 
preempt_disable() will prevent them to run.


For softirq running on return from interrupt context, they will finish 
before using fpsimd before the thread is resumed.


Softirq running after __get_cpu_fpsimd_context() is called will not be 
able to use FPSIMD (may_use_simd() returns false).


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


KVM Arm Device passthrough and linux-rt

2019-06-04 Thread Julien Grall
Hi,

While trying device passthrough on Linux-rt with KVM Arm, I had
the following splat.

[  363.410141] 000: BUG: sleeping function called from invalid context at 
kernel/locking/rtmutex.c:974
[  363.410150] 000: in_atomic(): 0, irqs_disabled(): 128, pid: 2916, name: 
qemu-system-aar
[  363.410153] 000: 4 locks held by qemu-system-aar/2916:
[  363.410157] 000:  #0: 8007bd248100 (&vcpu->mutex){+.+.}, at: 
kvm_vcpu_ioctl+0x70/0xae0
[  363.410171] 000:  #1: 8007bd1e2b20 (&kvm->irq_srcu){}, at: 
kvm_notify_acked_irq+0x7c/0x300
[  363.410179] 000:  #2: 8007bd1e2b20 (&kvm->irq_srcu){}, at: 
irqfd_resampler_ack+0x0/0xd8
[  363.410187] 000:  #3: 8007c2b27d28 (&ctx->wqh#2){+.+.}, at: 
eventfd_signal+0x24/0x78
[  363.410196] 000: irq event stamp: 4033894
[  363.410197] 000: hardirqs last  enabled at (4033893): [] 
_raw_spin_unlock_irqrestore+0x88/0x90
[  363.410203] 000: hardirqs last disabled at (4033894): [] 
kvm_arch_vcpu_ioctl_run+0x2a8/0xc08
[  363.410207] 000: softirqs last  enabled at (0): [] 
copy_process.isra.1.part.2+0x8d8/0x1958
[  363.410212] 000: softirqs last disabled at (0): [<>]  (null)
[  363.410216] 000: CPU: 0 PID: 2916 Comm: qemu-system-aar Tainted: GW  
 5.0.14-rt9-00013-g4b2a13c8a804 #84
[  363.410219] 000: Hardware name: AMD Seattle (Rev.B0) Development Board 
(Overdrive) (DT)
[  363.410221] 000: Call trace:
[  363.410222] 000:  dump_backtrace+0x0/0x158
[  363.410225] 000:  show_stack+0x14/0x20
[  363.410227] 000:  dump_stack+0xa0/0xd4
[  363.410230] 000:  ___might_sleep+0x16c/0x1f8
[  363.410234] 000:  rt_spin_lock+0x5c/0x70
[  363.410237] 000:  eventfd_signal+0x24/0x78
[  363.410238] 000:  irqfd_resampler_ack+0x94/0xd8
[  363.410241] 000:  kvm_notify_acked_irq+0xf8/0x300
[  363.410243] 000:  vgic_v2_fold_lr_state+0x174/0x1e0
[  363.410246] 000:  kvm_vgic_sync_hwstate+0x5c/0x2b8
[  363.410249] 000:  kvm_arch_vcpu_ioctl_run+0x624/0xc08
[  363.410250] 000:  kvm_vcpu_ioctl+0x3a0/0xae0
[  363.410252] 000:  do_vfs_ioctl+0xbc/0x910
[  363.410255] 000:  ksys_ioctl+0x78/0xa8
[  363.410257] 000:  __arm64_sys_ioctl+0x1c/0x28
[  363.410260] 000:  el0_svc_common+0x90/0x118
[  363.410263] 000:  el0_svc_handler+0x2c/0x80
[  363.410265] 000:  el0_svc+0x8/0xc

This is happening because vgic_v2_fold_lr_state() is expected
to be called with interrupt disabled. However, some of the path
(e.g eventfd) will take a spinlock.

The spinlock is from the waitqueue, so using a raw_spin_lock cannot
even be considered.

Do you have any input on how this could be solved?

Cheers,

-- 
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a separate file

2019-06-05 Thread Julien Grall

Hi,

I am CCing RISC-V folks to see if there are an interest to share the code.

@RISC-V: I noticed you are discussing about importing a version of ASID 
allocator in RISC-V. At a first look, the code looks quite similar. Would the 
library below helps you?


Cheers,

On 21/03/2019 16:36, Julien Grall wrote:

We will want to re-use the ASID allocator in a separate context (e.g
allocating VMID). So move the code in a new file.

The function asid_check_context has been moved in the header as a static
inline function because we want to avoid add a branch when checking if the
ASID is still valid.

Signed-off-by: Julien Grall 

---

This code will be used in the virt code for allocating VMID. I am not
entirely sure where to place it. Lib could potentially be a good place but I
am not entirely convinced the algo as it is could be used by other
architecture.

Looking at x86, it seems that it will not be possible to re-use because
the number of PCID (aka ASID) could be smaller than the number of CPUs.
See commit message 10af6235e0d327d42e1bad974385197817923dc1 "x86/mm:
Implement PCID based optimization: try to preserve old TLB entries using
PCI".
---
  arch/arm64/include/asm/asid.h |  77 ++
  arch/arm64/lib/Makefile   |   2 +
  arch/arm64/lib/asid.c | 185 +
  arch/arm64/mm/context.c   | 235 +-
  4 files changed, 267 insertions(+), 232 deletions(-)
  create mode 100644 arch/arm64/include/asm/asid.h
  create mode 100644 arch/arm64/lib/asid.c

diff --git a/arch/arm64/include/asm/asid.h b/arch/arm64/include/asm/asid.h
new file mode 100644
index ..bb62b587f37f
--- /dev/null
+++ b/arch/arm64/include/asm/asid.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ASM_ASID_H
+#define __ASM_ASM_ASID_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct asid_info
+{
+   atomic64_t  generation;
+   unsigned long   *map;
+   atomic64_t __percpu *active;
+   u64 __percpu*reserved;
+   u32 bits;
+   /* Lock protecting the structure */
+   raw_spinlock_t  lock;
+   /* Which CPU requires context flush on next call */
+   cpumask_t   flush_pending;
+   /* Number of ASID allocated by context (shift value) */
+   unsigned intctxt_shift;
+   /* Callback to locally flush the context. */
+   void(*flush_cpu_ctxt_cb)(void);
+};
+
+#define NUM_ASIDS(info)(1UL << ((info)->bits))
+#define NUM_CTXT_ASIDS(info)   (NUM_ASIDS(info) >> (info)->ctxt_shift)
+
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static inline void asid_check_context(struct asid_info *info,
+ atomic64_t *pasid, unsigned int cpu)
+{
+   u64 asid, old_active_asid;
+
+   asid = atomic64_read(pasid);
+
+   /*
+* The memory ordering here is subtle.
+* If our active_asid is non-zero and the ASID matches the current
+* generation, then we update the active_asid entry with a relaxed
+* cmpxchg. Racing with a concurrent rollover means that either:
+*
+* - We get a zero back from the cmpxchg and end up waiting on the
+*   lock. Taking the lock synchronises with the rollover and so
+*   we are forced to see the updated generation.
+*
+* - We get a valid ASID back from the cmpxchg, which means the
+*   relaxed xchg in flush_context will treat us as reserved
+*   because atomic RmWs are totally ordered for a given location.
+*/
+   old_active_asid = atomic64_read(&active_asid(info, cpu));
+   if (old_active_asid &&
+   !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
+   atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
+old_active_asid, asid))
+   return;
+
+   asid_new_context(info, pasid, cpu);
+}
+
+int asid_allocator_init(struct asid_info *info,
+   u32 bits, unsigned int asid_per_ctxt,
+   void (*flush_cpu_ctxt_cb)(void));
+
+#endif
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 5540a1638baf..720df5ee2aa2 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -5,6 +5,8 @@ lib-y   := clear_user.o delay.o copy_from_user.o
\
   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o   \
   

Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a separate file

2019-06-19 Thread Julien Grall




On 6/19/19 9:07 AM, Guo Ren wrote:

Hi Julien,


Hi,



You forgot CCing C-SKY folks :P


I wasn't aware you could be interested :).



Move arm asid allocator code in a generic one is a agood idea, I've
made a patchset for C-SKY and test is on processing, See:
https://lore.kernel.org/linux-csky/1560930553-26502-1-git-send-email-guo...@kernel.org/

If you plan to seperate it into generic one, I could co-work with you.


Was the ASID allocator work out of box on C-Sky? If so, I can easily 
move the code in a generic place (maybe lib/asid.c).


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH RFC 11/14] arm64: Move the ASID allocator code in a separate file

2019-06-19 Thread Julien Grall

Hi Guo,

On 19/06/2019 12:51, Guo Ren wrote:

On Wed, Jun 19, 2019 at 4:54 PM Julien Grall  wrote:




On 6/19/19 9:07 AM, Guo Ren wrote:

Hi Julien,


Hi,



You forgot CCing C-SKY folks :P


I wasn't aware you could be interested :).



Move arm asid allocator code in a generic one is a agood idea, I've
made a patchset for C-SKY and test is on processing, See:
https://lore.kernel.org/linux-csky/1560930553-26502-1-git-send-email-guo...@kernel.org/

If you plan to seperate it into generic one, I could co-work with you.


Was the ASID allocator work out of box on C-Sky?

Almost done, but one question:
arm64 remove the code in switch_mm:
   cpumask_clear_cpu(cpu, mm_cpumask(prev));
   cpumask_set_cpu(cpu, mm_cpumask(next));





Why? Although arm64 cache operations could affect all harts with CTC
method of interconnect, I think we should
keep these code for primitive integrity in linux. Because cpu_bitmap
is in mm_struct instead of mm->context.


I will let Will answer to this.

[...]


If so, I can easily move the code in a generic place (maybe lib/asid.c).

I think it's OK.


Will emits concern to move the code in lib. So I will stick with what I 
currently have.


Cheers,

--
Julien Grall


[RFC v2 01/14] arm64/mm: Introduce asid_info structure and move asid_generation/asid_map to it

2019-06-20 Thread Julien Grall
In an attempt to make the ASID allocator generic, create a new structure
asid_info to store all the information necessary for the allocator.

For now, move the variables asid_generation and asid_map to the new structure
asid_info. Follow-up patches will move more variables.

Note to avoid more renaming aftwards, a local variable 'info' has been
created and is a pointer to the ASID allocator structure.

Signed-off-by: Julien Grall 

---
Changes in v2:
- Add turn asid_info to a static variable
---
 arch/arm64/mm/context.c | 46 ++
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 1f0ea2facf24..8167c369172d 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -30,8 +30,11 @@
 static u32 asid_bits;
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
-static atomic64_t asid_generation;
-static unsigned long *asid_map;
+static struct asid_info
+{
+   atomic64_t  generation;
+   unsigned long   *map;
+} asid_info;
 
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
@@ -88,13 +91,13 @@ void verify_cpu_asid_bits(void)
}
 }
 
-static void flush_context(void)
+static void flush_context(struct asid_info *info)
 {
int i;
u64 asid;
 
/* Update the list of reserved ASIDs and the ASID bitmap. */
-   bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+   bitmap_clear(info->map, 0, NUM_USER_ASIDS);
 
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
@@ -107,7 +110,7 @@ static void flush_context(void)
 */
if (asid == 0)
asid = per_cpu(reserved_asids, i);
-   __set_bit(asid2idx(asid), asid_map);
+   __set_bit(asid2idx(asid), info->map);
per_cpu(reserved_asids, i) = asid;
}
 
@@ -142,11 +145,11 @@ static bool check_update_reserved_asid(u64 asid, u64 
newasid)
return hit;
 }
 
-static u64 new_context(struct mm_struct *mm)
+static u64 new_context(struct asid_info *info, struct mm_struct *mm)
 {
static u32 cur_idx = 1;
u64 asid = atomic64_read(&mm->context.id);
-   u64 generation = atomic64_read(&asid_generation);
+   u64 generation = atomic64_read(&info->generation);
 
if (asid != 0) {
u64 newasid = generation | (asid & ~ASID_MASK);
@@ -162,7 +165,7 @@ static u64 new_context(struct mm_struct *mm)
 * We had a valid ASID in a previous life, so try to re-use
 * it if possible.
 */
-   if (!__test_and_set_bit(asid2idx(asid), asid_map))
+   if (!__test_and_set_bit(asid2idx(asid), info->map))
return newasid;
}
 
@@ -173,20 +176,20 @@ static u64 new_context(struct mm_struct *mm)
 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
 * pairs.
 */
-   asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, cur_idx);
if (asid != NUM_USER_ASIDS)
goto set_asid;
 
/* We're out of ASIDs, so increment the global generation count */
generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
-&asid_generation);
-   flush_context();
+&info->generation);
+   flush_context(info);
 
/* We have more ASIDs than CPUs, so this will always succeed */
-   asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, 1);
 
 set_asid:
-   __set_bit(asid, asid_map);
+   __set_bit(asid, info->map);
cur_idx = asid;
return idx2asid(asid) | generation;
 }
@@ -195,6 +198,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 {
unsigned long flags;
u64 asid, old_active_asid;
+   struct asid_info *info = &asid_info;
 
if (system_supports_cnp())
cpu_set_reserved_ttbr0();
@@ -217,7 +221,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 */
old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
if (old_active_asid &&
-   !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) &&
+   !((asid ^ atomic64_read(&info->generation)) >> asid_bits) &&
atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
 old_active_asid, asid))
goto switch_mm_fastpath;
@@ -225,8 +229,8 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
raw_spin_lock_

[RFC v2 03/14] arm64/mm: Move bits to asid_info

2019-06-20 Thread Julien Grall
The variable bits hold information for a given ASID allocator. So move
it to the asid_info structure.

Because most of the macros were relying on bits, they are now taking an
extra parameter that is a pointer to the asid_info structure.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 59 +
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 6bacfc295f6e..7883347ece52 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -27,7 +27,6 @@
 #include 
 #include 
 
-static u32 asid_bits;
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
 static struct asid_info
@@ -36,6 +35,7 @@ static struct asid_info
unsigned long   *map;
atomic64_t __percpu *active;
u64 __percpu*reserved;
+   u32 bits;
 } asid_info;
 
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -46,17 +46,17 @@ static DEFINE_PER_CPU(u64, reserved_asids);
 
 static cpumask_t tlb_flush_pending;
 
-#define ASID_MASK  (~GENMASK(asid_bits - 1, 0))
-#define ASID_FIRST_VERSION (1UL << asid_bits)
+#define ASID_MASK(info)(~GENMASK((info)->bits - 1, 0))
+#define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1)
-#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1)
-#define idx2asid(idx)  (((idx) << 1) & ~ASID_MASK)
+#define NUM_USER_ASIDS(info)   (ASID_FIRST_VERSION(info) >> 1)
+#define asid2idx(info, asid)   (((asid) & ~ASID_MASK(info)) >> 1)
+#define idx2asid(info, idx)(((idx) << 1) & ~ASID_MASK(info))
 #else
-#define NUM_USER_ASIDS (ASID_FIRST_VERSION)
-#define asid2idx(asid) ((asid) & ~ASID_MASK)
-#define idx2asid(idx)  asid2idx(idx)
+#define NUM_USER_ASIDS(info)   (ASID_FIRST_VERSION(info))
+#define asid2idx(info, asid)   ((asid) & ~ASID_MASK(info))
+#define idx2asid(info, idx)asid2idx(info, idx)
 #endif
 
 /* Get the ASIDBits supported by the current CPU */
@@ -86,13 +86,13 @@ void verify_cpu_asid_bits(void)
 {
u32 asid = get_cpu_asid_bits();
 
-   if (asid < asid_bits) {
+   if (asid < asid_info.bits) {
/*
 * We cannot decrease the ASID size at runtime, so panic if we 
support
 * fewer ASID bits than the boot CPU.
 */
pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
-   smp_processor_id(), asid, asid_bits);
+   smp_processor_id(), asid, asid_info.bits);
cpu_panic_kernel();
}
 }
@@ -103,7 +103,7 @@ static void flush_context(struct asid_info *info)
u64 asid;
 
/* Update the list of reserved ASIDs and the ASID bitmap. */
-   bitmap_clear(info->map, 0, NUM_USER_ASIDS);
+   bitmap_clear(info->map, 0, NUM_USER_ASIDS(info));
 
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
@@ -116,7 +116,7 @@ static void flush_context(struct asid_info *info)
 */
if (asid == 0)
asid = reserved_asid(info, i);
-   __set_bit(asid2idx(asid), info->map);
+   __set_bit(asid2idx(info, asid), info->map);
reserved_asid(info, i) = asid;
}
 
@@ -159,7 +159,7 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
u64 generation = atomic64_read(&info->generation);
 
if (asid != 0) {
-   u64 newasid = generation | (asid & ~ASID_MASK);
+   u64 newasid = generation | (asid & ~ASID_MASK(info));
 
/*
 * If our current ASID was active during a rollover, we
@@ -172,7 +172,7 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
 * We had a valid ASID in a previous life, so try to re-use
 * it if possible.
 */
-   if (!__test_and_set_bit(asid2idx(asid), info->map))
+   if (!__test_and_set_bit(asid2idx(info, asid), info->map))
return newasid;
}
 
@@ -183,22 +183,22 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
 * pairs.
 */
-   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, cur_idx);
-   if (asid != NUM_USER_ASIDS)
+   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), cur_idx);
+   if (asid != NUM_USER_ASIDS(info))
goto set_asid;
 
/* We're 

[RFC v2 07/14] arm64/mm: Introduce NUM_ASIDS

2019-06-20 Thread Julien Grall
At the moment ASID_FIRST_VERSION is used to know the number of ASIDs
supported. As we are going to move the ASID allocator in a separate, it
would be better to use a different name for external users.

This patch adds NUM_ASIDS and implements ASID_FIRST_VERSION using it.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index d128f02644b0..beba8e5b4100 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -48,7 +48,9 @@ static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 
 #define ASID_MASK(info)(~GENMASK((info)->bits - 1, 0))
-#define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
+#define NUM_ASIDS(info)(1UL << ((info)->bits))
+
+#define ASID_FIRST_VERSION(info)   NUM_ASIDS(info)
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 #define ASID_PER_CONTEXT   2
@@ -56,7 +58,7 @@ static DEFINE_PER_CPU(u64, reserved_asids);
 #define ASID_PER_CONTEXT   1
 #endif
 
-#define NUM_CTXT_ASIDS(info)   (ASID_FIRST_VERSION(info) >> 
(info)->ctxt_shift)
+#define NUM_CTXT_ASIDS(info)   (NUM_ASIDS(info) >> (info)->ctxt_shift)
 #define asid2idx(info, asid)   (((asid) & ~ASID_MASK(info)) >> 
(info)->ctxt_shift)
 #define idx2asid(info, idx)(((idx) << (info)->ctxt_shift) & 
~ASID_MASK(info))
 
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC v2 00/14] kvm/arm: Align the VMID allocation with the arm64 ASID one

2019-06-20 Thread Julien Grall
Hi all,

This patch series is moving out the ASID allocator in a separate file in order
to re-use it for the VMID. The benefits are:
- CPUs are not forced to exit on a roll-over.
- Context invalidation is now per-CPU rather than
  broadcasted.

There are no performance regression on the fastpath for ASID allocation.
Actually on the hackbench measurement (300 hackbench) it was .7% faster.

The measurement was made on a Seattle based SoC (8 CPUs), with the
number of VMID limited to 4-bit. The test involves running concurrently 40
guests with 2 vCPUs. Each guest will then execute hackbench 5 times
before exiting.

The performance difference (on 5.1-rc1) between the current algo and the
new one are:
- 2.5% less exit from the guest
- 22.4% more flush, although they are now local rather than broadcasted
- 0.11% faster (just for the record)

The ASID allocator rework to make it generic has been divided in multiple
patches to make the review easier.

Compare to the first RFC, Arm is not duplicated most of the code anymore.
Instead, Arm will build the version from Arm64.

A branch with the patch based on 5.2-rc5 can be found:

http://xenbits.xen.org/gitweb/?p=people/julieng/linux-arm.git;a=shortlog;h=refs/heads/vmid-rework/rfc-v2

Best regards,

Cc: Russell King 

Julien Grall (14):
  arm64/mm: Introduce asid_info structure and move
asid_generation/asid_map to it
  arm64/mm: Move active_asids and reserved_asids to asid_info
  arm64/mm: Move bits to asid_info
  arm64/mm: Move the variable lock and tlb_flush_pending to asid_info
  arm64/mm: Remove dependency on MM in new_context
  arm64/mm: Store the number of asid allocated per context
  arm64/mm: Introduce NUM_ASIDS
  arm64/mm: Split asid_inits in 2 parts
  arm64/mm: Split the function check_and_switch_context in 3 parts
  arm64/mm: Introduce a callback to flush the local context
  arm64: Move the ASID allocator code in a separate file
  arm64/lib: asid: Allow user to update the context under the lock
  arm/kvm: Introduce a new VMID allocator
  kvm/arm: Align the VMID allocation with the arm64 ASID one

 arch/arm/include/asm/kvm_asm.h|   2 +-
 arch/arm/include/asm/kvm_host.h   |   5 +-
 arch/arm/include/asm/kvm_hyp.h|   1 +
 arch/arm/include/asm/lib_asid.h   |  81 +++
 arch/arm/kvm/Makefile |   1 +
 arch/arm/kvm/hyp/tlb.c|   8 +-
 arch/arm64/include/asm/kvm_asid.h |   8 ++
 arch/arm64/include/asm/kvm_asm.h  |   2 +-
 arch/arm64/include/asm/kvm_host.h |   5 +-
 arch/arm64/include/asm/lib_asid.h |  81 +++
 arch/arm64/kvm/hyp/tlb.c  |  10 +-
 arch/arm64/lib/Makefile   |   2 +
 arch/arm64/lib/asid.c | 191 +++
 arch/arm64/mm/context.c   | 205 ++
 virt/kvm/arm/arm.c| 112 +++--
 15 files changed, 447 insertions(+), 267 deletions(-)
 create mode 100644 arch/arm/include/asm/lib_asid.h
 create mode 100644 arch/arm64/include/asm/kvm_asid.h
 create mode 100644 arch/arm64/include/asm/lib_asid.h
 create mode 100644 arch/arm64/lib/asid.c

-- 
2.11.0



[RFC v2 04/14] arm64/mm: Move the variable lock and tlb_flush_pending to asid_info

2019-06-20 Thread Julien Grall
The variables lock and tlb_flush_pending holds information for a given
ASID allocator. So move them to the asid_info structure.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 7883347ece52..6457a9310fe4 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -27,8 +27,6 @@
 #include 
 #include 
 
-static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-
 static struct asid_info
 {
atomic64_t  generation;
@@ -36,6 +34,9 @@ static struct asid_info
atomic64_t __percpu *active;
u64 __percpu*reserved;
u32 bits;
+   raw_spinlock_t  lock;
+   /* Which CPU requires context flush on next call */
+   cpumask_t   flush_pending;
 } asid_info;
 
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -44,8 +45,6 @@ static struct asid_info
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 
-static cpumask_t tlb_flush_pending;
-
 #define ASID_MASK(info)(~GENMASK((info)->bits - 1, 0))
 #define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
 
@@ -124,7 +123,7 @@ static void flush_context(struct asid_info *info)
 * Queue a TLB invalidation for each CPU to perform on next
 * context-switch
 */
-   cpumask_setall(&tlb_flush_pending);
+   cpumask_setall(&info->flush_pending);
 }
 
 static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
@@ -233,7 +232,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 old_active_asid, asid))
goto switch_mm_fastpath;
 
-   raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+   raw_spin_lock_irqsave(&info->lock, flags);
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
@@ -241,11 +240,11 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
atomic64_set(&mm->context.id, asid);
}
 
-   if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+   if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
local_flush_tlb_all();
 
atomic64_set(&active_asid(info, cpu), asid);
-   raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+   raw_spin_unlock_irqrestore(&info->lock, flags);
 
 switch_mm_fastpath:
 
@@ -288,6 +287,8 @@ static int asids_init(void)
info->active = &active_asids;
info->reserved = &reserved_asids;
 
+   raw_spin_lock_init(&info->lock);
+
pr_info("ASID allocator initialised with %lu entries\n",
NUM_USER_ASIDS(info));
return 0;
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC v2 06/14] arm64/mm: Store the number of asid allocated per context

2019-06-20 Thread Julien Grall
Currently the number of ASID allocated per context is determined at
compilation time. As the algorithm is becoming generic, the user may
want to instantiate the ASID allocator multiple time with different
number of ASID allocated.

Add a field in asid_info to track the number ASID allocated per context.
This is stored in term of shift amount to avoid division in the code.

This means the number of ASID allocated per context should be a power of
two.

At the same time rename NUM_USERS_ASIDS to NUM_CTXT_ASIDS to make the
name more generic.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 31 +--
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index a9cc59288b08..d128f02644b0 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -37,6 +37,8 @@ static struct asid_info
raw_spinlock_t  lock;
/* Which CPU requires context flush on next call */
cpumask_t   flush_pending;
+   /* Number of ASID allocated by context (shift value) */
+   unsigned intctxt_shift;
 } asid_info;
 
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -49,15 +51,15 @@ static DEFINE_PER_CPU(u64, reserved_asids);
 #define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define NUM_USER_ASIDS(info)   (ASID_FIRST_VERSION(info) >> 1)
-#define asid2idx(info, asid)   (((asid) & ~ASID_MASK(info)) >> 1)
-#define idx2asid(info, idx)(((idx) << 1) & ~ASID_MASK(info))
+#define ASID_PER_CONTEXT   2
 #else
-#define NUM_USER_ASIDS(info)   (ASID_FIRST_VERSION(info))
-#define asid2idx(info, asid)   ((asid) & ~ASID_MASK(info))
-#define idx2asid(info, idx)asid2idx(info, idx)
+#define ASID_PER_CONTEXT   1
 #endif
 
+#define NUM_CTXT_ASIDS(info)   (ASID_FIRST_VERSION(info) >> 
(info)->ctxt_shift)
+#define asid2idx(info, asid)   (((asid) & ~ASID_MASK(info)) >> 
(info)->ctxt_shift)
+#define idx2asid(info, idx)(((idx) << (info)->ctxt_shift) & 
~ASID_MASK(info))
+
 /* Get the ASIDBits supported by the current CPU */
 static u32 get_cpu_asid_bits(void)
 {
@@ -102,7 +104,7 @@ static void flush_context(struct asid_info *info)
u64 asid;
 
/* Update the list of reserved ASIDs and the ASID bitmap. */
-   bitmap_clear(info->map, 0, NUM_USER_ASIDS(info));
+   bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info));
 
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
@@ -182,8 +184,8 @@ static u64 new_context(struct asid_info *info, atomic64_t 
*pasid)
 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
 * pairs.
 */
-   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), cur_idx);
-   if (asid != NUM_USER_ASIDS(info))
+   asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx);
+   if (asid != NUM_CTXT_ASIDS(info))
goto set_asid;
 
/* We're out of ASIDs, so increment the global generation count */
@@ -192,7 +194,7 @@ static u64 new_context(struct asid_info *info, atomic64_t 
*pasid)
flush_context(info);
 
/* We have more ASIDs than CPUs, so this will always succeed */
-   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), 1);
+   asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1);
 
 set_asid:
__set_bit(asid, info->map);
@@ -272,17 +274,18 @@ static int asids_init(void)
struct asid_info *info = &asid_info;
 
info->bits = get_cpu_asid_bits();
+   info->ctxt_shift = ilog2(ASID_PER_CONTEXT);
/*
 * Expect allocation after rollover to fail if we don't have at least
 * one more ASID than CPUs. ASID #0 is reserved for init_mm.
 */
-   WARN_ON(NUM_USER_ASIDS(info) - 1 <= num_possible_cpus());
+   WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
-   info->map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS(info)),
+   info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
sizeof(*info->map), GFP_KERNEL);
if (!info->map)
panic("Failed to allocate bitmap for %lu ASIDs\n",
- NUM_USER_ASIDS(info));
+ NUM_CTXT_ASIDS(info));
 
info->active = &active_asids;
info->reserved = &reserved_asids;
@@ -290,7 +293,7 @@ static int asids_init(void)
raw_spin_lock_init(&info->lock);
 
pr_info("ASID allocator initialised with %lu entries\n",
-   NUM_USER_ASIDS(info));
+

[RFC v2 02/14] arm64/mm: Move active_asids and reserved_asids to asid_info

2019-06-20 Thread Julien Grall
The variables active_asids and reserved_asids hold information for a
given ASID allocator. So move them to the structure asid_info.

At the same time, introduce wrappers to access the active and reserved
ASIDs to make the code clearer.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 34 ++
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 8167c369172d..6bacfc295f6e 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -34,10 +34,16 @@ static struct asid_info
 {
atomic64_t  generation;
unsigned long   *map;
+   atomic64_t __percpu *active;
+   u64 __percpu*reserved;
 } asid_info;
 
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
+
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
+
 static cpumask_t tlb_flush_pending;
 
 #define ASID_MASK  (~GENMASK(asid_bits - 1, 0))
@@ -100,7 +106,7 @@ static void flush_context(struct asid_info *info)
bitmap_clear(info->map, 0, NUM_USER_ASIDS);
 
for_each_possible_cpu(i) {
-   asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
+   asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
/*
 * If this CPU has already been through a
 * rollover, but hasn't run another task in
@@ -109,9 +115,9 @@ static void flush_context(struct asid_info *info)
 * the process it is still running.
 */
if (asid == 0)
-   asid = per_cpu(reserved_asids, i);
+   asid = reserved_asid(info, i);
__set_bit(asid2idx(asid), info->map);
-   per_cpu(reserved_asids, i) = asid;
+   reserved_asid(info, i) = asid;
}
 
/*
@@ -121,7 +127,8 @@ static void flush_context(struct asid_info *info)
cpumask_setall(&tlb_flush_pending);
 }
 
-static bool check_update_reserved_asid(u64 asid, u64 newasid)
+static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
+  u64 newasid)
 {
int cpu;
bool hit = false;
@@ -136,9 +143,9 @@ static bool check_update_reserved_asid(u64 asid, u64 
newasid)
 * generation.
 */
for_each_possible_cpu(cpu) {
-   if (per_cpu(reserved_asids, cpu) == asid) {
+   if (reserved_asid(info, cpu) == asid) {
hit = true;
-   per_cpu(reserved_asids, cpu) = newasid;
+   reserved_asid(info, cpu) = newasid;
}
}
 
@@ -158,7 +165,7 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
 * If our current ASID was active during a rollover, we
 * can continue to use it and this was just a false alarm.
 */
-   if (check_update_reserved_asid(asid, newasid))
+   if (check_update_reserved_asid(info, asid, newasid))
return newasid;
 
/*
@@ -207,8 +214,8 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 
/*
 * The memory ordering here is subtle.
-* If our active_asids is non-zero and the ASID matches the current
-* generation, then we update the active_asids entry with a relaxed
+* If our active_asid is non-zero and the ASID matches the current
+* generation, then we update the active_asid entry with a relaxed
 * cmpxchg. Racing with a concurrent rollover means that either:
 *
 * - We get a zero back from the cmpxchg and end up waiting on the
@@ -219,10 +226,10 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 *   relaxed xchg in flush_context will treat us as reserved
 *   because atomic RmWs are totally ordered for a given location.
 */
-   old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
+   old_active_asid = atomic64_read(&active_asid(info, cpu));
if (old_active_asid &&
!((asid ^ atomic64_read(&info->generation)) >> asid_bits) &&
-   atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
+   atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
 old_active_asid, asid))
goto switch_mm_fastpath;
 
@@ -237,7 +244,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
local_flush_tlb_all();
 
-   atomic64_set(&per_cpu(active_asids, cpu), asid);
+   atomic64_set(&active_asid(info, cpu), a

[RFC v2 09/14] arm64/mm: Split the function check_and_switch_context in 3 parts

2019-06-20 Thread Julien Grall
The function check_and_switch_context is used to:
1) Check whether the ASID is still valid
2) Generate a new one if it is not valid
3) Switch the context

While the latter is specific to the MM subsystem, the rest could be part
of the generic ASID allocator.

After this patch, the function is now split in 3 parts which corresponds
to the use of the functions:
1) asid_check_context: Check if the ASID is still valid
2) asid_new_context: Generate a new ASID for the context
3) check_and_switch_context: Call 1) and 2) and switch the context

1) and 2) have not been merged in a single function because we want to
avoid to add a branch in when the ASID is still valid. This will matter
when the code will be moved in separate file later on as 1) will reside
in the header as a static inline function.

Signed-off-by: Julien Grall 

---

Will wants to avoid to add a branch when the ASID is still valid. So
1) and 2) are in separates function. The former will move to a new
header and make static inline.
---
 arch/arm64/mm/context.c | 51 +
 1 file changed, 39 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 81bc3d365436..fbef5a5c5624 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -204,16 +204,21 @@ static u64 new_context(struct asid_info *info, atomic64_t 
*pasid)
return idx2asid(info, asid) | generation;
 }
 
-void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
+static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+unsigned int cpu);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static void asid_check_context(struct asid_info *info,
+  atomic64_t *pasid, unsigned int cpu)
 {
-   unsigned long flags;
u64 asid, old_active_asid;
-   struct asid_info *info = &asid_info;
 
-   if (system_supports_cnp())
-   cpu_set_reserved_ttbr0();
-
-   asid = atomic64_read(&mm->context.id);
+   asid = atomic64_read(pasid);
 
/*
 * The memory ordering here is subtle.
@@ -234,14 +239,30 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
!((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
 old_active_asid, asid))
-   goto switch_mm_fastpath;
+   return;
+
+   asid_new_context(info, pasid, cpu);
+}
+
+/*
+ * Generate a new ASID for the context.
+ *
+ * @pasid: Pointer to the current ASID batch allocated. It will be updated
+ * with the new ASID batch.
+ * @cpu: current CPU ID. Must have been acquired through get_cpu()
+ */
+static void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+unsigned int cpu)
+{
+   unsigned long flags;
+   u64 asid;
 
raw_spin_lock_irqsave(&info->lock, flags);
/* Check that our ASID belongs to the current generation. */
-   asid = atomic64_read(&mm->context.id);
+   asid = atomic64_read(pasid);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
-   asid = new_context(info, &mm->context.id);
-   atomic64_set(&mm->context.id, asid);
+   asid = new_context(info, pasid);
+   atomic64_set(pasid, asid);
}
 
if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
@@ -249,8 +270,14 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 
atomic64_set(&active_asid(info, cpu), asid);
raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
+{
+   if (system_supports_cnp())
+   cpu_set_reserved_ttbr0();
 
-switch_mm_fastpath:
+   asid_check_context(&asid_info, &mm->context.id, cpu);
 
arm64_apply_bp_hardening();
 
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC v2 05/14] arm64/mm: Remove dependency on MM in new_context

2019-06-20 Thread Julien Grall
The function new_context will be part of a generic ASID allocator. At
the moment, the MM structure is only used to fetch the ASID.

To remove the dependency on MM, it is possible to just pass a pointer to
the current ASID.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 6457a9310fe4..a9cc59288b08 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -151,10 +151,10 @@ static bool check_update_reserved_asid(struct asid_info 
*info, u64 asid,
return hit;
 }
 
-static u64 new_context(struct asid_info *info, struct mm_struct *mm)
+static u64 new_context(struct asid_info *info, atomic64_t *pasid)
 {
static u32 cur_idx = 1;
-   u64 asid = atomic64_read(&mm->context.id);
+   u64 asid = atomic64_read(pasid);
u64 generation = atomic64_read(&info->generation);
 
if (asid != 0) {
@@ -236,7 +236,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
-   asid = new_context(info, mm);
+   asid = new_context(info, &mm->context.id);
atomic64_set(&mm->context.id, asid);
}
 
-- 
2.11.0



[RFC v2 11/14] arm64: Move the ASID allocator code in a separate file

2019-06-20 Thread Julien Grall
We will want to re-use the ASID allocator in a separate context (e.g
allocating VMID). So move the code in a new file.

The function asid_check_context has been moved in the header as a static
inline function because we want to avoid add a branch when checking if the
ASID is still valid.

Signed-off-by: Julien Grall 

---

This code will be used in the virt code for allocating VMID. I am not
entirely sure where to place it. Lib could potentially be a good place but I
am not entirely convinced the algo as it is could be used by other
architecture.

Looking at x86, it seems that it will not be possible to re-use because
the number of PCID (aka ASID) could be smaller than the number of CPUs.
See commit message 10af6235e0d327d42e1bad974385197817923dc1 "x86/mm:
Implement PCID based optimization: try to preserve old TLB entries using
PCI".

Changes in v2:
- Rename the header from asid.h to lib_asid.h
---
 arch/arm64/include/asm/lib_asid.h |  77 +
 arch/arm64/lib/Makefile   |   2 +
 arch/arm64/lib/asid.c | 185 ++
 arch/arm64/mm/context.c   | 235 +-
 4 files changed, 267 insertions(+), 232 deletions(-)
 create mode 100644 arch/arm64/include/asm/lib_asid.h
 create mode 100644 arch/arm64/lib/asid.c

diff --git a/arch/arm64/include/asm/lib_asid.h 
b/arch/arm64/include/asm/lib_asid.h
new file mode 100644
index ..c18e9eca500e
--- /dev/null
+++ b/arch/arm64/include/asm/lib_asid.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ASM_LIB_ASID_H
+#define __ASM_ASM_LIB_ASID_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct asid_info
+{
+   atomic64_t  generation;
+   unsigned long   *map;
+   atomic64_t __percpu *active;
+   u64 __percpu*reserved;
+   u32 bits;
+   /* Lock protecting the structure */
+   raw_spinlock_t  lock;
+   /* Which CPU requires context flush on next call */
+   cpumask_t   flush_pending;
+   /* Number of ASID allocated by context (shift value) */
+   unsigned intctxt_shift;
+   /* Callback to locally flush the context. */
+   void(*flush_cpu_ctxt_cb)(void);
+};
+
+#define NUM_ASIDS(info)(1UL << ((info)->bits))
+#define NUM_CTXT_ASIDS(info)   (NUM_ASIDS(info) >> (info)->ctxt_shift)
+
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static inline void asid_check_context(struct asid_info *info,
+ atomic64_t *pasid, unsigned int cpu)
+{
+   u64 asid, old_active_asid;
+
+   asid = atomic64_read(pasid);
+
+   /*
+* The memory ordering here is subtle.
+* If our active_asid is non-zero and the ASID matches the current
+* generation, then we update the active_asid entry with a relaxed
+* cmpxchg. Racing with a concurrent rollover means that either:
+*
+* - We get a zero back from the cmpxchg and end up waiting on the
+*   lock. Taking the lock synchronises with the rollover and so
+*   we are forced to see the updated generation.
+*
+* - We get a valid ASID back from the cmpxchg, which means the
+*   relaxed xchg in flush_context will treat us as reserved
+*   because atomic RmWs are totally ordered for a given location.
+*/
+   old_active_asid = atomic64_read(&active_asid(info, cpu));
+   if (old_active_asid &&
+   !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
+   atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
+old_active_asid, asid))
+   return;
+
+   asid_new_context(info, pasid, cpu);
+}
+
+int asid_allocator_init(struct asid_info *info,
+   u32 bits, unsigned int asid_per_ctxt,
+   void (*flush_cpu_ctxt_cb)(void));
+
+#endif
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 33c2a4abda04..37169d541ab5 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -5,6 +5,8 @@ lib-y   := clear_user.o delay.o copy_from_user.o
\
   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o   \
   strchr.o strrchr.o tishift.o
 
+lib-y  += asid.o
+
 ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
 obj-$(CONFIG_XOR_BLOCKS)   += xor-neon.o
 CFLAGS_REMOVE_xor-neon.o   += -mgeneral-regs-only
diff --git a/a

[RFC v2 14/14] kvm/arm: Align the VMID allocation with the arm64 ASID one

2019-06-20 Thread Julien Grall
At the moment, the VMID algorithm will send an SGI to all the CPUs to
force an exit and then broadcast a full TLB flush and I-Cache
invalidation.

This patch re-use the new ASID allocator. The
benefits are:
- CPUs are not forced to exit at roll-over. Instead the VMID will be
marked reserved and the context will be flushed at next exit. This
will reduce the IPIs traffic.
- Context invalidation is now per-CPU rather than broadcasted.

With the new algo, the code is now adapted:
- The function __kvm_flush_vm_context() has been renamed to
__kvm_flush_cpu_vmid_context and now only flushing the current CPU context.
- The call to update_vttbr() will be done with preemption disabled
as the new algo requires to store information per-CPU.
- The TLBs associated to EL1 will be flushed when booting a CPU to
deal with stale information. This was previously done on the
allocation of the first VMID of a new generation.

The measurement was made on a Seattle based SoC (8 CPUs), with the
number of VMID limited to 4-bit. The test involves running concurrently 40
guests with 2 vCPUs. Each guest will then execute hackbench 5 times
before exiting.

The performance difference between the current algo and the new one are:
- 2.5% less exit from the guest
- 22.4% more flush, although they are now local rather than
broadcasted
- 0.11% faster (just for the record)

Signed-off-by: Julien Grall 


Looking at the __kvm_flush_vm_context, it might be possible to
reduce more the overhead by removing the I-Cache flush for other
cache than VIPT. This has been left aside for now.
---
 arch/arm/include/asm/kvm_asm.h|   2 +-
 arch/arm/include/asm/kvm_host.h   |   5 +-
 arch/arm/include/asm/kvm_hyp.h|   1 +
 arch/arm/kvm/hyp/tlb.c|   8 +--
 arch/arm64/include/asm/kvm_asid.h |   8 +++
 arch/arm64/include/asm/kvm_asm.h  |   2 +-
 arch/arm64/include/asm/kvm_host.h |   5 +-
 arch/arm64/kvm/hyp/tlb.c  |  10 ++--
 virt/kvm/arm/arm.c| 112 +-
 9 files changed, 61 insertions(+), 92 deletions(-)
 create mode 100644 arch/arm64/include/asm/kvm_asid.h

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index f615830f9f57..c2a2e6ef1e2f 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -53,7 +53,7 @@ struct kvm_vcpu;
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
 
-extern void __kvm_flush_vm_context(void);
+extern void __kvm_flush_cpu_vmid_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
 extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index f80418ddeb60..7b894ff16688 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -50,8 +50,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
 struct kvm_vmid {
-   /* The VMID generation used for the virt. memory system */
-   u64vmid_gen;
+   /* The ASID used for the ASID allocator */
+   atomic64_t asid;
u32vmid;
 };
 
@@ -259,7 +259,6 @@ unsigned long __kvm_call_hyp(void *hypfn, ...);
ret;\
})
 
-void force_vm_exit(const cpumask_t *mask);
 int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
  struct kvm_vcpu_events *events);
 
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 87bcd18df8d5..c3d1011ca1bf 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -75,6 +75,7 @@
 #define TLBIALLIS  __ACCESS_CP15(c8, 0, c3, 0)
 #define TLBIALL__ACCESS_CP15(c8, 0, c7, 0)
 #define TLBIALLNSNHIS  __ACCESS_CP15(c8, 4, c3, 4)
+#define TLBIALLNSNH__ACCESS_CP15(c8, 4, c7, 4)
 #define PRRR   __ACCESS_CP15(c10, 0, c2, 0)
 #define NMRR   __ACCESS_CP15(c10, 0, c2, 1)
 #define AMAIR0 __ACCESS_CP15(c10, 0, c3, 0)
diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c
index 8e4afba73635..42b9ab47fc94 100644
--- a/arch/arm/kvm/hyp/tlb.c
+++ b/arch/arm/kvm/hyp/tlb.c
@@ -71,9 +71,9 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu 
*vcpu)
write_sysreg(0, VTTBR);
 }
 
-void __hyp_text __kvm_flush_vm_context(void)
+void __hyp_text __kvm_flush_cpu_vmid_context(void)
 {
-   write_sysreg(0, TLBIALLNSNHIS);
-   write_sysreg(0, ICIALLUIS);
-   dsb(ish);
+   write_sysreg(0, TLBIALLNSNH);
+   write_sysreg(0, ICIALLU);
+   dsb(nsh);
 }
diff --git a/arch/arm64/include/asm/kvm_asid.h 
b/arch/arm64/include/asm/kvm_asid.h
new file mode 100644
index ..8b586e43c094
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_asid.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL

[RFC v2 08/14] arm64/mm: Split asid_inits in 2 parts

2019-06-20 Thread Julien Grall
Move out the common initialization of the ASID allocator in a separate
function.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 43 +++
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index beba8e5b4100..81bc3d365436 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -271,31 +271,50 @@ asmlinkage void post_ttbr_update_workaround(void)
CONFIG_CAVIUM_ERRATUM_27456));
 }
 
-static int asids_init(void)
+/*
+ * Initialize the ASID allocator
+ *
+ * @info: Pointer to the asid allocator structure
+ * @bits: Number of ASIDs available
+ * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are
+ * allocated contiguously for a given context. This value should be a power of
+ * 2.
+ */
+static int asid_allocator_init(struct asid_info *info,
+  u32 bits, unsigned int asid_per_ctxt)
 {
-   struct asid_info *info = &asid_info;
-
-   info->bits = get_cpu_asid_bits();
-   info->ctxt_shift = ilog2(ASID_PER_CONTEXT);
+   info->bits = bits;
+   info->ctxt_shift = ilog2(asid_per_ctxt);
/*
 * Expect allocation after rollover to fail if we don't have at least
-* one more ASID than CPUs. ASID #0 is reserved for init_mm.
+* one more ASID than CPUs. ASID #0 is always reserved.
 */
WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus());
atomic64_set(&info->generation, ASID_FIRST_VERSION(info));
info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)),
sizeof(*info->map), GFP_KERNEL);
if (!info->map)
-   panic("Failed to allocate bitmap for %lu ASIDs\n",
- NUM_CTXT_ASIDS(info));
-
-   info->active = &active_asids;
-   info->reserved = &reserved_asids;
+   return -ENOMEM;
 
raw_spin_lock_init(&info->lock);
 
+   return 0;
+}
+
+static int asids_init(void)
+{
+   u32 bits = get_cpu_asid_bits();
+
+   if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT))
+   panic("Unable to initialize ASID allocator for %lu ASIDs\n",
+ 1UL << bits);
+
+   asid_info.active = &active_asids;
+   asid_info.reserved = &reserved_asids;
+
pr_info("ASID allocator initialised with %lu entries\n",
-   NUM_CTXT_ASIDS(info));
+   NUM_CTXT_ASIDS(&asid_info));
+
return 0;
 }
 early_initcall(asids_init);
-- 
2.11.0



[RFC v2 10/14] arm64/mm: Introduce a callback to flush the local context

2019-06-20 Thread Julien Grall
Flushing the local context will vary depending on the actual user of the ASID
allocator. Introduce a new callback to flush the local context and move
the call to flush local TLB in it.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index fbef5a5c5624..3df63a28856c 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -39,6 +39,8 @@ static struct asid_info
cpumask_t   flush_pending;
/* Number of ASID allocated by context (shift value) */
unsigned intctxt_shift;
+   /* Callback to locally flush the context. */
+   void(*flush_cpu_ctxt_cb)(void);
 } asid_info;
 
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -266,7 +268,7 @@ static void asid_new_context(struct asid_info *info, 
atomic64_t *pasid,
}
 
if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
-   local_flush_tlb_all();
+   info->flush_cpu_ctxt_cb();
 
atomic64_set(&active_asid(info, cpu), asid);
raw_spin_unlock_irqrestore(&info->lock, flags);
@@ -298,6 +300,11 @@ asmlinkage void post_ttbr_update_workaround(void)
CONFIG_CAVIUM_ERRATUM_27456));
 }
 
+static void asid_flush_cpu_ctxt(void)
+{
+   local_flush_tlb_all();
+}
+
 /*
  * Initialize the ASID allocator
  *
@@ -308,10 +315,12 @@ asmlinkage void post_ttbr_update_workaround(void)
  * 2.
  */
 static int asid_allocator_init(struct asid_info *info,
-  u32 bits, unsigned int asid_per_ctxt)
+  u32 bits, unsigned int asid_per_ctxt,
+  void (*flush_cpu_ctxt_cb)(void))
 {
info->bits = bits;
info->ctxt_shift = ilog2(asid_per_ctxt);
+   info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
/*
 * Expect allocation after rollover to fail if we don't have at least
 * one more ASID than CPUs. ASID #0 is always reserved.
@@ -332,7 +341,8 @@ static int asids_init(void)
 {
u32 bits = get_cpu_asid_bits();
 
-   if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT))
+   if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
+asid_flush_cpu_ctxt))
panic("Unable to initialize ASID allocator for %lu ASIDs\n",
  1UL << bits);
 
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC v2 12/14] arm64/lib: asid: Allow user to update the context under the lock

2019-06-20 Thread Julien Grall
Some users of the ASID allocator (e.g VMID) will require to update the
context when a new ASID is generated. This has to be protected by a lock
to prevent concurrent modification.

Rather than introducing yet another lock, it is possible to re-use the
allocator lock for that purpose. This patch introduces a new callback
that will be call when updating the context.

Signed-off-by: Julien Grall 
---
 arch/arm64/include/asm/lib_asid.h | 12 
 arch/arm64/lib/asid.c | 10 --
 arch/arm64/mm/context.c   | 11 ---
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/lib_asid.h 
b/arch/arm64/include/asm/lib_asid.h
index c18e9eca500e..810f0b05a8da 100644
--- a/arch/arm64/include/asm/lib_asid.h
+++ b/arch/arm64/include/asm/lib_asid.h
@@ -23,6 +23,8 @@ struct asid_info
unsigned intctxt_shift;
/* Callback to locally flush the context. */
void(*flush_cpu_ctxt_cb)(void);
+   /* Callback to call when a context is updated */
+   void(*update_ctxt_cb)(void *ctxt);
 };
 
 #define NUM_ASIDS(info)(1UL << ((info)->bits))
@@ -31,7 +33,7 @@ struct asid_info
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
 
 void asid_new_context(struct asid_info *info, atomic64_t *pasid,
- unsigned int cpu);
+ unsigned int cpu, void *ctxt);
 
 /*
  * Check the ASID is still valid for the context. If not generate a new ASID.
@@ -40,7 +42,8 @@ void asid_new_context(struct asid_info *info, atomic64_t 
*pasid,
  * @cpu: current CPU ID. Must have been acquired throught get_cpu()
  */
 static inline void asid_check_context(struct asid_info *info,
- atomic64_t *pasid, unsigned int cpu)
+  atomic64_t *pasid, unsigned int cpu,
+  void *ctxt)
 {
u64 asid, old_active_asid;
 
@@ -67,11 +70,12 @@ static inline void asid_check_context(struct asid_info 
*info,
 old_active_asid, asid))
return;
 
-   asid_new_context(info, pasid, cpu);
+   asid_new_context(info, pasid, cpu, ctxt);
 }
 
 int asid_allocator_init(struct asid_info *info,
u32 bits, unsigned int asid_per_ctxt,
-   void (*flush_cpu_ctxt_cb)(void));
+   void (*flush_cpu_ctxt_cb)(void),
+   void (*update_ctxt_cb)(void *ctxt));
 
 #endif
diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
index 7252e4fdd5e9..dd2c6e4c1ff0 100644
--- a/arch/arm64/lib/asid.c
+++ b/arch/arm64/lib/asid.c
@@ -130,9 +130,10 @@ static u64 new_context(struct asid_info *info, atomic64_t 
*pasid)
  * @pasid: Pointer to the current ASID batch allocated. It will be updated
  * with the new ASID batch.
  * @cpu: current CPU ID. Must have been acquired through get_cpu()
+ * @ctxt: Context to update when calling update_context
  */
 void asid_new_context(struct asid_info *info, atomic64_t *pasid,
- unsigned int cpu)
+ unsigned int cpu, void *ctxt)
 {
unsigned long flags;
u64 asid;
@@ -149,6 +150,9 @@ void asid_new_context(struct asid_info *info, atomic64_t 
*pasid,
info->flush_cpu_ctxt_cb();
 
atomic64_set(&active_asid(info, cpu), asid);
+
+   info->update_ctxt_cb(ctxt);
+
raw_spin_unlock_irqrestore(&info->lock, flags);
 }
 
@@ -163,11 +167,13 @@ void asid_new_context(struct asid_info *info, atomic64_t 
*pasid,
  */
 int asid_allocator_init(struct asid_info *info,
u32 bits, unsigned int asid_per_ctxt,
-   void (*flush_cpu_ctxt_cb)(void))
+   void (*flush_cpu_ctxt_cb)(void),
+   void (*update_ctxt_cb)(void *ctxt))
 {
info->bits = bits;
info->ctxt_shift = ilog2(asid_per_ctxt);
info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb;
+   info->update_ctxt_cb = update_ctxt_cb;
/*
 * Expect allocation after rollover to fail if we don't have at least
 * one more ASID than CPUs. ASID #0 is always reserved.
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b745cf356fe1..527ea82983d7 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -82,7 +82,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned 
int cpu)
if (system_supports_cnp())
cpu_set_reserved_ttbr0();
 
-   asid_check_context(&asid_info, &mm->context.id, cpu);
+   asid_check_context(&asid_info, &mm->context.id, cpu, mm);
 
arm64_apply_bp_hardening();
 
@@ -108,12 +108,17 @@ static void asid_flush_cpu_ctxt(void)
local_flush_tlb_all();
 }
 
+static void asid_update_ctxt(void *ctxt)
+{
+   /* No

[RFC v2 13/14] arm/kvm: Introduce a new VMID allocator

2019-06-20 Thread Julien Grall
A follow-up patch will replace the KVM VMID allocator with the arm64 ASID
allocator.

To avoid as much as possible duplication, the arm KVM code will directly
compile arch/arm64/lib/asid.c. The header is a verbatim to copy to
avoid breaking the assumption that architecture port has self-containers
headers.

Signed-off-by: Julien Grall 
Cc: Russell King 

---
I hit a warning when compiling the ASID code:

linux/arch/arm/kvm/../../arm64/lib/asid.c:17: warning: "ASID_MASK" redefined
 #define ASID_MASK(info)   (~GENMASK((info)->bits - 1, 0))

In file included from linux/include/linux/mm_types.h:18,
 from linux/include/linux/mmzone.h:21,
 from linux/include/linux/gfp.h:6,
 from linux/include/linux/slab.h:15,
 from linux/arch/arm/kvm/../../arm64/lib/asid.c:11:
linux/arch/arm/include/asm/mmu.h:26: note: this is the location of the previous 
definition
 #define ASID_MASK ((~0ULL) << ASID_BITS)

I haven't yet resolved because I am not sure of the best way to go.
AFAICT ASID_MASK is only used in mm/context.c. So I am wondering whether
it would be acceptable to move the define.

Changes in v2:
- Re-use arm64/lib/asid.c rather than duplication the code.
---
 arch/arm/include/asm/lib_asid.h | 81 +
 arch/arm/kvm/Makefile   |  1 +
 2 files changed, 82 insertions(+)
 create mode 100644 arch/arm/include/asm/lib_asid.h

diff --git a/arch/arm/include/asm/lib_asid.h b/arch/arm/include/asm/lib_asid.h
new file mode 100644
index ..79bce4686d21
--- /dev/null
+++ b/arch/arm/include/asm/lib_asid.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM_LIB_ASID_H__
+#define __ARM_LIB_ASID_H__
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct asid_info
+{
+   atomic64_t  generation;
+   unsigned long   *map;
+   atomic64_t __percpu *active;
+   u64 __percpu*reserved;
+   u32 bits;
+   /* Lock protecting the structure */
+   raw_spinlock_t  lock;
+   /* Which CPU requires context flush on next call */
+   cpumask_t   flush_pending;
+   /* Number of ASID allocated by context (shift value) */
+   unsigned intctxt_shift;
+   /* Callback to locally flush the context. */
+   void(*flush_cpu_ctxt_cb)(void);
+   /* Callback to call when a context is updated */
+   void(*update_ctxt_cb)(void *ctxt);
+};
+
+#define NUM_ASIDS(info)(1UL << ((info)->bits))
+#define NUM_CTXT_ASIDS(info)   (NUM_ASIDS(info) >> (info)->ctxt_shift)
+
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+
+void asid_new_context(struct asid_info *info, atomic64_t *pasid,
+ unsigned int cpu, void *ctxt);
+
+/*
+ * Check the ASID is still valid for the context. If not generate a new ASID.
+ *
+ * @pasid: Pointer to the current ASID batch
+ * @cpu: current CPU ID. Must have been acquired throught get_cpu()
+ */
+static inline void asid_check_context(struct asid_info *info,
+  atomic64_t *pasid, unsigned int cpu,
+  void *ctxt)
+{
+   u64 asid, old_active_asid;
+
+   asid = atomic64_read(pasid);
+
+   /*
+* The memory ordering here is subtle.
+* If our active_asid is non-zero and the ASID matches the current
+* generation, then we update the active_asid entry with a relaxed
+* cmpxchg. Racing with a concurrent rollover means that either:
+*
+* - We get a zero back from the cmpxchg and end up waiting on the
+*   lock. Taking the lock synchronises with the rollover and so
+*   we are forced to see the updated generation.
+*
+* - We get a valid ASID back from the cmpxchg, which means the
+*   relaxed xchg in flush_context will treat us as reserved
+*   because atomic RmWs are totally ordered for a given location.
+*/
+   old_active_asid = atomic64_read(&active_asid(info, cpu));
+   if (old_active_asid &&
+   !((asid ^ atomic64_read(&info->generation)) >> info->bits) &&
+   atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
+old_active_asid, asid))
+   return;
+
+   asid_new_context(info, pasid, cpu, ctxt);
+}
+
+int asid_allocator_init(struct asid_info *info,
+   u32 bits, unsigned int asid_per_ctxt,
+   void (*flush_cpu_ctxt_cb)(void),
+   void (*update_ctxt_cb)(void *ctxt));
+
+#endif /* __ARM_LIB_ASID_H__ */
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 531e59f5be9c..6ab49bd84531 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -40,

Re: [RFC v2 11/14] arm64: Move the ASID allocator code in a separate file

2019-07-15 Thread Julien Grall

On 04/07/2019 15:56, James Morse wrote:

Hi Julien,


Hi James,

Thank you for the review.



On 20/06/2019 14:06, Julien Grall wrote:

We will want to re-use the ASID allocator in a separate context (e.g
allocating VMID). So move the code in a new file.

The function asid_check_context has been moved in the header as a static
inline function because we want to avoid add a branch when checking if the
ASID is still valid.



diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 3df63a28856c..b745cf356fe1 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -23,46 +23,21 @@



-#define ASID_FIRST_VERSION(info)   NUM_ASIDS(info)



diff --git a/arch/arm64/lib/asid.c b/arch/arm64/lib/asid.c
new file mode 100644
index ..7252e4fdd5e9
--- /dev/null
+++ b/arch/arm64/lib/asid.c
@@ -0,0 +1,185 @@



+#define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))


(oops!)


Good catch, I will fix it in the next version.





@@ -344,7 +115,7 @@ static int asids_init(void)
if (!asid_allocator_init(&asid_info, bits, ASID_PER_CONTEXT,
 asid_flush_cpu_ctxt))
panic("Unable to initialize ASID allocator for %lu ASIDs\n",
- 1UL << bits);
+ NUM_ASIDS(&asid_info));


Could this go in the patch that adds NUM_ASIDS()?


Actually this change is potentially wrong. This relies on asid_allocator_init() 
to set asid_info.bits even if the function fails.


So I think it would be best to keep 1UL << bits here.

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v2 12/14] arm64/lib: asid: Allow user to update the context under the lock

2019-07-15 Thread Julien Grall




On 03/07/2019 18:35, James Morse wrote:

Hi Julien,


Hi James,


On 20/06/2019 14:06, Julien Grall wrote:

Some users of the ASID allocator (e.g VMID) will require to update the
context when a new ASID is generated. This has to be protected by a lock
to prevent concurrent modification.

Rather than introducing yet another lock, it is possible to re-use the
allocator lock for that purpose. This patch introduces a new callback
that will be call when updating the context.


You're using this later in the series to mask out the generation from the 
atomic64 to
leave just the vmid.


You are right.



Where does this concurrent modification happen? The value is only written if we 
have a
rollover, and while its active the only bits that could change are the 
generation.
(subsequent vCPUs that take the slow path for the same VM will see the updated 
generation
and skip the new_context call)

If we did the generation filtering in update_vmid() after the call to
asid_check_context(), what would go wrong?
It happens more often than is necessary and would need a WRITE_ONCE(), but the 
vmid can't
change until we become preemptible and another vCPU gets a chance to make its 
vmid active.


I think I was over cautious. Pre-filtering after asid_check_context() is equally 
fine as long as update_vttbr() is called from preemptible context.


Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v2 14/14] kvm/arm: Align the VMID allocation with the arm64 ASID one

2019-07-15 Thread Julien Grall

On 03/07/2019 18:36, James Morse wrote:

Hi Julien,


Hi James,


On 20/06/2019 14:06, Julien Grall wrote:

At the moment, the VMID algorithm will send an SGI to all the CPUs to
force an exit and then broadcast a full TLB flush and I-Cache
invalidation.

This patch re-use the new ASID allocator. The
benefits are:
 - CPUs are not forced to exit at roll-over. Instead the VMID will be
 marked reserved and the context will be flushed at next exit. This
 will reduce the IPIs traffic.
 - Context invalidation is now per-CPU rather than broadcasted.


+ Catalin has a model of the asid-allocator.


That's a good point :).





With the new algo, the code is now adapted:
 - The function __kvm_flush_vm_context() has been renamed to
 __kvm_flush_cpu_vmid_context and now only flushing the current CPU context.
 - The call to update_vttbr() will be done with preemption disabled
 as the new algo requires to store information per-CPU.
 - The TLBs associated to EL1 will be flushed when booting a CPU to
 deal with stale information. This was previously done on the
 allocation of the first VMID of a new generation.

The measurement was made on a Seattle based SoC (8 CPUs), with the
number of VMID limited to 4-bit. The test involves running concurrently 40
guests with 2 vCPUs. Each guest will then execute hackbench 5 times
before exiting.



diff --git a/arch/arm64/include/asm/kvm_asid.h 
b/arch/arm64/include/asm/kvm_asid.h
new file mode 100644
index ..8b586e43c094
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_asid.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARM64_KVM_ASID_H__
+#define __ARM64_KVM_ASID_H__
+
+#include 
+
+#endif /* __ARM64_KVM_ASID_H__ */
+
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ff73f5462aca..06821f548c0f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -62,7 +62,7 @@ extern char __kvm_hyp_init_end[];
  
  extern char __kvm_hyp_vector[];
  
-extern void __kvm_flush_vm_context(void);

+extern void __kvm_flush_cpu_vmid_context(void);
  extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);


As we've got a __kvm_tlb_flush_local_vmid(), would __kvm_tlb_flush_local_all() 
fit in
better? (This mirrors local_flush_tlb_all() too)


I am happy with the renaming here.





  extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
  extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 4bcd9c1291d5..7ef45b7da4eb 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -68,8 +68,8 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long 
ext);
  void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t 
idmap_start);
  
  struct kvm_vmid {

-   /* The VMID generation used for the virt. memory system */
-   u64vmid_gen;
+   /* The ASID used for the ASID allocator */
+   atomic64_t asid;


Can we call this 'id' as happens in mm_context_t? (calling it asid is confusing)


I am fine with this suggestion.




u32vmid;


Can we filter out the generation bits in kvm_get_vttbr() in the same way the 
arch code
does in cpu_do_switch_mm().

I think this saves writing back a cached pre-filtered version every time, or 
needing
special hooks to know when the value changed. (so we can remove this variable)


[...]


+static void vmid_update_ctxt(void *ctxt)
  {
+   struct kvm_vmid *vmid = ctxt;
+   u64 asid = atomic64_read(&vmid->asid);



+   vmid->vmid = asid & ((1ULL << kvm_get_vmid_bits()) - 1);


I don't like having to poke this through the asid-allocator as a kvm-specific 
hack. Can we
do it in kvm_get_vttbr()?


I will have a look.





  }



@@ -487,48 +467,11 @@ static bool need_new_vmid_gen(struct kvm_vmid *vmid)


(git made a mess of the diff here... squashed to just the new code:)


  static void update_vmid(struct kvm_vmid *vmid)
  {



+   int cpu = get_cpu();
  
+	asid_check_context(&vmid_info, &vmid->asid, cpu, vmid);
  
+	put_cpu();


If we're calling update_vmid() in a pre-emptible context, aren't we already 
doomed?


Yes we are. This made me realize that Linux-RT replaced the preempt_disable() in 
the caller by migrate_disable(). The latter will prevent the task to move to 
another CPU but allow preemption.


This patch will likely makes things awfully broken for Linux-RT. I will have a 
look to see if we can call this from preempt notifier.




Could we use smp_processor_id() instead.



  }




@@ -1322,6 +1271,8 @@ static void cpu_init_hyp_mode(void *dummy)
  
  	__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);

__cpu_init_stage2();




+   kvm_call_hyp(__kvm_flush_cpu_vmid_context);


I think we only need to do this for VHE systems too. cpu_hyp_reinit() on

KVM Arm64 and Linux-RT issues

2019-07-23 Thread Julien Grall
 handle_exit+0x134/0x1d0
[  122.565031] 000:  kvm_arch_vcpu_ioctl_run+0x658/0xbc0
[  122.565032] 000:  kvm_vcpu_ioctl+0x3a0/0xae0
[  122.565034] 000:  do_vfs_ioctl+0xbc/0x910
[  122.565036] 000:  ksys_ioctl+0x78/0xa8
[  122.565038] 000:  __arm64_sys_ioctl+0x1c/0x28
[  122.565040] 000:  el0_svc_common.constprop.0+0x90/0x188
[  122.565042] 000:  el0_svc_handler+0x28/0x78
[  122.565045] 000:  el0_svc+0x8/0xc
[  122.565048] 000: Code: 88107c31 35b0 d65f03c0 f9800031 (885f7c31)
[  122.565052] 000: ---[ end trace 0005 ]---
[  122.565060] 000: note: kvm-vcpu-1[1430] exited with preempt_count 1

The first problem "BUG: sleeping function called from invalid context at 
kernel/locking/rtmutex.c:968" seem to be related to RT-specific commit 
d628c3c56cab "hrtimer: Introduce expiry spin lock".


From my understanding, the problem is the hrtimer_cancel() is called from a 
preempt notifier and therefore preemption will be disabled. The patch mentioned 
above will actually require hrtimer_cancel() to be called from preemptible context.


Do you have any thoughts how the problem should be addressed?

The second problem seems to hint that migrate_enable() was called on a task not 
pinned (-1). This will result to derefence an invalid value. I need to 
investigate how this can happen.


Looking at the other RT tree, I think 5.0 RT now has the same problem.

Cheers,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 01/15] arm64/mm: Introduce asid_info structure and move asid_generation/asid_map to it

2019-07-24 Thread Julien Grall
In an attempt to make the ASID allocator generic, create a new structure
asid_info to store all the information necessary for the allocator.

For now, move the variables asid_generation and asid_map to the new structure
asid_info. Follow-up patches will move more variables.

Note to avoid more renaming aftwards, a local variable 'info' has been
created and is a pointer to the ASID allocator structure.

Signed-off-by: Julien Grall 

---
Changes in v2:
- Add turn asid_info to a static variable
---
 arch/arm64/mm/context.c | 46 ++
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b5e329fde2dd..b0789f30d03b 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -19,8 +19,11 @@
 static u32 asid_bits;
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
-static atomic64_t asid_generation;
-static unsigned long *asid_map;
+static struct asid_info
+{
+   atomic64_t  generation;
+   unsigned long   *map;
+} asid_info;
 
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
@@ -77,13 +80,13 @@ void verify_cpu_asid_bits(void)
}
 }
 
-static void flush_context(void)
+static void flush_context(struct asid_info *info)
 {
int i;
u64 asid;
 
/* Update the list of reserved ASIDs and the ASID bitmap. */
-   bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+   bitmap_clear(info->map, 0, NUM_USER_ASIDS);
 
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
@@ -96,7 +99,7 @@ static void flush_context(void)
 */
if (asid == 0)
asid = per_cpu(reserved_asids, i);
-   __set_bit(asid2idx(asid), asid_map);
+   __set_bit(asid2idx(asid), info->map);
per_cpu(reserved_asids, i) = asid;
}
 
@@ -131,11 +134,11 @@ static bool check_update_reserved_asid(u64 asid, u64 
newasid)
return hit;
 }
 
-static u64 new_context(struct mm_struct *mm)
+static u64 new_context(struct asid_info *info, struct mm_struct *mm)
 {
static u32 cur_idx = 1;
u64 asid = atomic64_read(&mm->context.id);
-   u64 generation = atomic64_read(&asid_generation);
+   u64 generation = atomic64_read(&info->generation);
 
if (asid != 0) {
u64 newasid = generation | (asid & ~ASID_MASK);
@@ -151,7 +154,7 @@ static u64 new_context(struct mm_struct *mm)
 * We had a valid ASID in a previous life, so try to re-use
 * it if possible.
 */
-   if (!__test_and_set_bit(asid2idx(asid), asid_map))
+   if (!__test_and_set_bit(asid2idx(asid), info->map))
return newasid;
}
 
@@ -162,20 +165,20 @@ static u64 new_context(struct mm_struct *mm)
 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
 * pairs.
 */
-   asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, cur_idx);
if (asid != NUM_USER_ASIDS)
goto set_asid;
 
/* We're out of ASIDs, so increment the global generation count */
generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
-&asid_generation);
-   flush_context();
+&info->generation);
+   flush_context(info);
 
/* We have more ASIDs than CPUs, so this will always succeed */
-   asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, 1);
 
 set_asid:
-   __set_bit(asid, asid_map);
+   __set_bit(asid, info->map);
cur_idx = asid;
return idx2asid(asid) | generation;
 }
@@ -184,6 +187,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 {
unsigned long flags;
u64 asid, old_active_asid;
+   struct asid_info *info = &asid_info;
 
if (system_supports_cnp())
cpu_set_reserved_ttbr0();
@@ -206,7 +210,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 */
old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
if (old_active_asid &&
-   !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) &&
+   !((asid ^ atomic64_read(&info->generation)) >> asid_bits) &&
atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
 old_active_asid, asid))
goto switch_mm_fastpath;
@@ -214,8 +218,8 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
raw_spin_lock_irqs

[PATCH v3 00/15] kvm/arm: Align the VMID allocation with the arm64 ASID one

2019-07-24 Thread Julien Grall
Hi all,

This patch series is moving out the ASID allocator in a separate file in order
to re-use it for the VMID. The benefits are:
- CPUs are not forced to exit on a roll-over.
- Context invalidation is now per-CPU rather than
  broadcasted.

There are no performance regression on the fastpath for ASID allocation.
Actually on the hackbench measurement (300 hackbench) it was .7% faster.

The measurement was made on a Seattle based SoC (8 CPUs), with the
number of VMID limited to 4-bit. The test involves running concurrently 40
guests with 2 vCPUs. Each guest will then execute hackbench 5 times
before exiting.

The performance difference (on 5.1-rc1) between the current algo and the
new one are:
- 2.5% less exit from the guest
- 22.4% more flush, although they are now local rather than broadcasted
- 0.11% faster (just for the record)

The ASID allocator rework to make it generic has been divided in multiple
patches to make the review easier.

A branch with the patch based on 5.3-rc1 can be found:

http://xenbits.xen.org/gitweb/?p=people/julieng/linux-arm.git;a=shortlog;h=refs/heads/vmid-rework/v3

For all the changes see in each patch.

Best regards,

Cc: Russell King 

Julien Grall (15):
  arm64/mm: Introduce asid_info structure and move
asid_generation/asid_map to it
  arm64/mm: Move active_asids and reserved_asids to asid_info
  arm64/mm: Move bits to asid_info
  arm64/mm: Move the variable lock and tlb_flush_pending to asid_info
  arm64/mm: Remove dependency on MM in new_context
  arm64/mm: Store the number of asid allocated per context
  arm64/mm: Introduce NUM_ASIDS
  arm64/mm: Split asid_inits in 2 parts
  arm64/mm: Split the function check_and_switch_context in 3 parts
  arm64/mm: Introduce a callback to flush the local context
  arm64: Move the ASID allocator code in a separate file
  arm64/lib: Add an helper to free memory allocated by the ASID
allocator
  arm/kvm: Introduce a new VMID allocator
  arch/arm64: Introduce a capability to tell whether 16-bit VMID is
available
  kvm/arm: Align the VMID allocation with the arm64 ASID one

 arch/arm/include/asm/kvm_asm.h|   2 +-
 arch/arm/include/asm/kvm_host.h   |   5 +-
 arch/arm/include/asm/kvm_hyp.h|   1 +
 arch/arm/include/asm/kvm_mmu.h|   3 +-
 arch/arm/include/asm/lib_asid.h   |  79 +++
 arch/arm/kvm/Makefile |   1 +
 arch/arm/kvm/hyp/tlb.c|   8 +-
 arch/arm64/include/asm/cpucaps.h  |   3 +-
 arch/arm64/include/asm/kvm_asid.h |   8 ++
 arch/arm64/include/asm/kvm_asm.h  |   2 +-
 arch/arm64/include/asm/kvm_host.h |   5 +-
 arch/arm64/include/asm/kvm_mmu.h  |   7 +-
 arch/arm64/include/asm/lib_asid.h |  79 +++
 arch/arm64/kernel/cpufeature.c|   9 ++
 arch/arm64/kvm/hyp/tlb.c  |  10 +-
 arch/arm64/lib/Makefile   |   2 +
 arch/arm64/lib/asid.c | 190 
 arch/arm64/mm/context.c   | 200 +-
 virt/kvm/arm/arm.c| 125 +---
 19 files changed, 458 insertions(+), 281 deletions(-)
 create mode 100644 arch/arm/include/asm/lib_asid.h
 create mode 100644 arch/arm64/include/asm/kvm_asid.h
 create mode 100644 arch/arm64/include/asm/lib_asid.h
 create mode 100644 arch/arm64/lib/asid.c

-- 
2.11.0



[PATCH v3 04/15] arm64/mm: Move the variable lock and tlb_flush_pending to asid_info

2019-07-24 Thread Julien Grall
The variables lock and tlb_flush_pending holds information for a given
ASID allocator. So move them to the asid_info structure.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 49fff350e12f..b50f52a09baf 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -16,8 +16,6 @@
 #include 
 #include 
 
-static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-
 static struct asid_info
 {
atomic64_t  generation;
@@ -25,6 +23,9 @@ static struct asid_info
atomic64_t __percpu *active;
u64 __percpu*reserved;
u32 bits;
+   raw_spinlock_t  lock;
+   /* Which CPU requires context flush on next call */
+   cpumask_t   flush_pending;
 } asid_info;
 
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -33,8 +34,6 @@ static struct asid_info
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 
-static cpumask_t tlb_flush_pending;
-
 #define ASID_MASK(info)(~GENMASK((info)->bits - 1, 0))
 #define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
 
@@ -113,7 +112,7 @@ static void flush_context(struct asid_info *info)
 * Queue a TLB invalidation for each CPU to perform on next
 * context-switch
 */
-   cpumask_setall(&tlb_flush_pending);
+   cpumask_setall(&info->flush_pending);
 }
 
 static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
@@ -222,7 +221,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 old_active_asid, asid))
goto switch_mm_fastpath;
 
-   raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+   raw_spin_lock_irqsave(&info->lock, flags);
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
@@ -230,11 +229,11 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
atomic64_set(&mm->context.id, asid);
}
 
-   if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+   if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending))
local_flush_tlb_all();
 
atomic64_set(&active_asid(info, cpu), asid);
-   raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+   raw_spin_unlock_irqrestore(&info->lock, flags);
 
 switch_mm_fastpath:
 
@@ -277,6 +276,8 @@ static int asids_init(void)
info->active = &active_asids;
info->reserved = &reserved_asids;
 
+   raw_spin_lock_init(&info->lock);
+
pr_info("ASID allocator initialised with %lu entries\n",
NUM_USER_ASIDS(info));
return 0;
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 02/15] arm64/mm: Move active_asids and reserved_asids to asid_info

2019-07-24 Thread Julien Grall
The variables active_asids and reserved_asids hold information for a
given ASID allocator. So move them to the structure asid_info.

At the same time, introduce wrappers to access the active and reserved
ASIDs to make the code clearer.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 34 ++
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b0789f30d03b..3de028803284 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -23,10 +23,16 @@ static struct asid_info
 {
atomic64_t  generation;
unsigned long   *map;
+   atomic64_t __percpu *active;
+   u64 __percpu*reserved;
 } asid_info;
 
+#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
+#define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu)
+
 static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
+
 static cpumask_t tlb_flush_pending;
 
 #define ASID_MASK  (~GENMASK(asid_bits - 1, 0))
@@ -89,7 +95,7 @@ static void flush_context(struct asid_info *info)
bitmap_clear(info->map, 0, NUM_USER_ASIDS);
 
for_each_possible_cpu(i) {
-   asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
+   asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
/*
 * If this CPU has already been through a
 * rollover, but hasn't run another task in
@@ -98,9 +104,9 @@ static void flush_context(struct asid_info *info)
 * the process it is still running.
 */
if (asid == 0)
-   asid = per_cpu(reserved_asids, i);
+   asid = reserved_asid(info, i);
__set_bit(asid2idx(asid), info->map);
-   per_cpu(reserved_asids, i) = asid;
+   reserved_asid(info, i) = asid;
}
 
/*
@@ -110,7 +116,8 @@ static void flush_context(struct asid_info *info)
cpumask_setall(&tlb_flush_pending);
 }
 
-static bool check_update_reserved_asid(u64 asid, u64 newasid)
+static bool check_update_reserved_asid(struct asid_info *info, u64 asid,
+  u64 newasid)
 {
int cpu;
bool hit = false;
@@ -125,9 +132,9 @@ static bool check_update_reserved_asid(u64 asid, u64 
newasid)
 * generation.
 */
for_each_possible_cpu(cpu) {
-   if (per_cpu(reserved_asids, cpu) == asid) {
+   if (reserved_asid(info, cpu) == asid) {
hit = true;
-   per_cpu(reserved_asids, cpu) = newasid;
+   reserved_asid(info, cpu) = newasid;
}
}
 
@@ -147,7 +154,7 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
 * If our current ASID was active during a rollover, we
 * can continue to use it and this was just a false alarm.
 */
-   if (check_update_reserved_asid(asid, newasid))
+   if (check_update_reserved_asid(info, asid, newasid))
return newasid;
 
/*
@@ -196,8 +203,8 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 
/*
 * The memory ordering here is subtle.
-* If our active_asids is non-zero and the ASID matches the current
-* generation, then we update the active_asids entry with a relaxed
+* If our active_asid is non-zero and the ASID matches the current
+* generation, then we update the active_asid entry with a relaxed
 * cmpxchg. Racing with a concurrent rollover means that either:
 *
 * - We get a zero back from the cmpxchg and end up waiting on the
@@ -208,10 +215,10 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
 *   relaxed xchg in flush_context will treat us as reserved
 *   because atomic RmWs are totally ordered for a given location.
 */
-   old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
+   old_active_asid = atomic64_read(&active_asid(info, cpu));
if (old_active_asid &&
!((asid ^ atomic64_read(&info->generation)) >> asid_bits) &&
-   atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
+   atomic64_cmpxchg_relaxed(&active_asid(info, cpu),
 old_active_asid, asid))
goto switch_mm_fastpath;
 
@@ -226,7 +233,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
local_flush_tlb_all();
 
-   atomic64_set(&per_cpu(active_asids, cpu), asid);
+   atomic64_set(&active_asid(info, cpu), asid);
 

[PATCH v3 05/15] arm64/mm: Remove dependency on MM in new_context

2019-07-24 Thread Julien Grall
The function new_context will be part of a generic ASID allocator. At
the moment, the MM structure is only used to fetch the ASID.

To remove the dependency on MM, it is possible to just pass a pointer to
the current ASID.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b50f52a09baf..dfb0da35a541 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -140,10 +140,10 @@ static bool check_update_reserved_asid(struct asid_info 
*info, u64 asid,
return hit;
 }
 
-static u64 new_context(struct asid_info *info, struct mm_struct *mm)
+static u64 new_context(struct asid_info *info, atomic64_t *pasid)
 {
static u32 cur_idx = 1;
-   u64 asid = atomic64_read(&mm->context.id);
+   u64 asid = atomic64_read(pasid);
u64 generation = atomic64_read(&info->generation);
 
if (asid != 0) {
@@ -225,7 +225,7 @@ void check_and_switch_context(struct mm_struct *mm, 
unsigned int cpu)
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&info->generation)) >> info->bits) {
-   asid = new_context(info, mm);
+   asid = new_context(info, &mm->context.id);
atomic64_set(&mm->context.id, asid);
}
 
-- 
2.11.0



[PATCH v3 07/15] arm64/mm: Introduce NUM_ASIDS

2019-07-24 Thread Julien Grall
At the moment ASID_FIRST_VERSION is used to know the number of ASIDs
supported. As we are going to move the ASID allocator in a separate, it
would be better to use a different name for external users.

This patch adds NUM_ASIDS and implements ASID_FIRST_VERSION using it.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 2e1e495cd1d8..3b40ac4a2541 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -37,7 +37,9 @@ static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 
 #define ASID_MASK(info)(~GENMASK((info)->bits - 1, 0))
-#define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
+#define NUM_ASIDS(info)(1UL << ((info)->bits))
+
+#define ASID_FIRST_VERSION(info)   NUM_ASIDS(info)
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 #define ASID_PER_CONTEXT   2
@@ -45,7 +47,7 @@ static DEFINE_PER_CPU(u64, reserved_asids);
 #define ASID_PER_CONTEXT   1
 #endif
 
-#define NUM_CTXT_ASIDS(info)   (ASID_FIRST_VERSION(info) >> 
(info)->ctxt_shift)
+#define NUM_CTXT_ASIDS(info)   (NUM_ASIDS(info) >> (info)->ctxt_shift)
 #define asid2idx(info, asid)   (((asid) & ~ASID_MASK(info)) >> 
(info)->ctxt_shift)
 #define idx2asid(info, idx)(((idx) << (info)->ctxt_shift) & 
~ASID_MASK(info))
 
-- 
2.11.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 03/15] arm64/mm: Move bits to asid_info

2019-07-24 Thread Julien Grall
The variable bits hold information for a given ASID allocator. So move
it to the asid_info structure.

Because most of the macros were relying on bits, they are now taking an
extra parameter that is a pointer to the asid_info structure.

Signed-off-by: Julien Grall 
---
 arch/arm64/mm/context.c | 59 +
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 3de028803284..49fff350e12f 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -16,7 +16,6 @@
 #include 
 #include 
 
-static u32 asid_bits;
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
 static struct asid_info
@@ -25,6 +24,7 @@ static struct asid_info
unsigned long   *map;
atomic64_t __percpu *active;
u64 __percpu*reserved;
+   u32 bits;
 } asid_info;
 
 #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu)
@@ -35,17 +35,17 @@ static DEFINE_PER_CPU(u64, reserved_asids);
 
 static cpumask_t tlb_flush_pending;
 
-#define ASID_MASK  (~GENMASK(asid_bits - 1, 0))
-#define ASID_FIRST_VERSION (1UL << asid_bits)
+#define ASID_MASK(info)(~GENMASK((info)->bits - 1, 0))
+#define ASID_FIRST_VERSION(info)   (1UL << ((info)->bits))
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1)
-#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1)
-#define idx2asid(idx)  (((idx) << 1) & ~ASID_MASK)
+#define NUM_USER_ASIDS(info)   (ASID_FIRST_VERSION(info) >> 1)
+#define asid2idx(info, asid)   (((asid) & ~ASID_MASK(info)) >> 1)
+#define idx2asid(info, idx)(((idx) << 1) & ~ASID_MASK(info))
 #else
-#define NUM_USER_ASIDS (ASID_FIRST_VERSION)
-#define asid2idx(asid) ((asid) & ~ASID_MASK)
-#define idx2asid(idx)  asid2idx(idx)
+#define NUM_USER_ASIDS(info)   (ASID_FIRST_VERSION(info))
+#define asid2idx(info, asid)   ((asid) & ~ASID_MASK(info))
+#define idx2asid(info, idx)asid2idx(info, idx)
 #endif
 
 /* Get the ASIDBits supported by the current CPU */
@@ -75,13 +75,13 @@ void verify_cpu_asid_bits(void)
 {
u32 asid = get_cpu_asid_bits();
 
-   if (asid < asid_bits) {
+   if (asid < asid_info.bits) {
/*
 * We cannot decrease the ASID size at runtime, so panic if we 
support
 * fewer ASID bits than the boot CPU.
 */
pr_crit("CPU%d: smaller ASID size(%u) than boot CPU (%u)\n",
-   smp_processor_id(), asid, asid_bits);
+   smp_processor_id(), asid, asid_info.bits);
cpu_panic_kernel();
}
 }
@@ -92,7 +92,7 @@ static void flush_context(struct asid_info *info)
u64 asid;
 
/* Update the list of reserved ASIDs and the ASID bitmap. */
-   bitmap_clear(info->map, 0, NUM_USER_ASIDS);
+   bitmap_clear(info->map, 0, NUM_USER_ASIDS(info));
 
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&active_asid(info, i), 0);
@@ -105,7 +105,7 @@ static void flush_context(struct asid_info *info)
 */
if (asid == 0)
asid = reserved_asid(info, i);
-   __set_bit(asid2idx(asid), info->map);
+   __set_bit(asid2idx(info, asid), info->map);
reserved_asid(info, i) = asid;
}
 
@@ -148,7 +148,7 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
u64 generation = atomic64_read(&info->generation);
 
if (asid != 0) {
-   u64 newasid = generation | (asid & ~ASID_MASK);
+   u64 newasid = generation | (asid & ~ASID_MASK(info));
 
/*
 * If our current ASID was active during a rollover, we
@@ -161,7 +161,7 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
 * We had a valid ASID in a previous life, so try to re-use
 * it if possible.
 */
-   if (!__test_and_set_bit(asid2idx(asid), info->map))
+   if (!__test_and_set_bit(asid2idx(info, asid), info->map))
return newasid;
}
 
@@ -172,22 +172,22 @@ static u64 new_context(struct asid_info *info, struct 
mm_struct *mm)
 * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
 * pairs.
 */
-   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS, cur_idx);
-   if (asid != NUM_USER_ASIDS)
+   asid = find_next_zero_bit(info->map, NUM_USER_ASIDS(info), cur_idx);
+   if (asid != NUM_USER_ASIDS(info))
goto set_asid;
 
/* We're 

  1   2   3   >