Re: [RFT PATCH v3 01/27] arm64: Cope with CPUs stuck in VHE mode

2021-03-26 Thread Hector Martin

On 25/03/2021 05.00, Marc Zyngier wrote:

I've come up with this on top of the original patch, spitting a
warning when the right conditions are met. It's pretty ugly, but hey,
so is the HW this runs on.


[...]

Folded into v4 and tested; works fine with `kvm-arm.mode=nvhe`, throwing 
the ugly WARN.


--
Hector Martin (mar...@marcan.st)
Public Key: https://mrcn.st/pub


Re: [RFT PATCH v3 01/27] arm64: Cope with CPUs stuck in VHE mode

2021-03-24 Thread Marc Zyngier
On Wed, 24 Mar 2021 18:05:46 +,
Will Deacon  wrote:
> 
> On Fri, Mar 05, 2021 at 06:38:36AM +0900, Hector Martin wrote:
> > From: Marc Zyngier 
> > 
> > It seems that the CPU known as Apple M1 has the terrible habit
> > of being stuck with HCR_EL2.E2H==1, in violation of the architecture.
> > 
> > Try and work around this deplorable state of affairs by detecting
> > the stuck bit early and short-circuit the nVHE dance. It is still
> > unknown whether there are many more such nuggets to be found...
> > 
> > Reported-by: Hector Martin 
> > Signed-off-by: Marc Zyngier 
> > ---
> >  arch/arm64/kernel/head.S | 33 ++---
> >  arch/arm64/kernel/hyp-stub.S | 28 
> >  2 files changed, 54 insertions(+), 7 deletions(-)
> > 
> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > index 66b0e0b66e31..673002b11865 100644
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr)
> >   * booted in EL1 or EL2 respectively.
> >   */
> >  SYM_FUNC_START(init_kernel_el)
> > -   mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
> > -   msr sctlr_el1, x0
> > -
> > mrs x0, CurrentEL
> > cmp x0, #CurrentEL_EL2
> > b.eqinit_el2
> >  
> >  SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
> > +   mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
> > +   msr sctlr_el1, x0
> > isb
> > mov_q   x0, INIT_PSTATE_EL1
> > msr spsr_el1, x0
> > @@ -504,6 +503,34 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
> > msr vbar_el2, x0
> > isb
> >  
> > +   /*
> > +* Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
> > +* making it impossible to start in nVHE mode. Is that
> > +* compliant with the architecture? Absolutely not!
> > +*/
> > +   mrs x0, hcr_el2
> > +   and x0, x0, #HCR_E2H
> > +   cbz x0, 1f
> > +
> > +   /* Switching to VHE requires a sane SCTLR_EL1 as a start */
> > +   mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
> > +   msr_s   SYS_SCTLR_EL12, x0
> > +
> > +   /*
> > +* Force an eret into a helper "function", and let it return
> > +* to our original caller... This makes sure that we have
> > +* initialised the basic PSTATE state.
> > +*/
> > +   mov x0, #INIT_PSTATE_EL2
> > +   msr spsr_el1, x0
> > +   adr_l   x0, stick_to_vhe
> > +   msr elr_el1, x0
> > +   eret
> > +
> > +1:
> > +   mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
> > +   msr sctlr_el1, x0
> > +
> > msr elr_el2, lr
> > mov w0, #BOOT_CPU_MODE_EL2
> > eret
> > diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> > index 5eccbd62fec8..c7601030ee82 100644
> > --- a/arch/arm64/kernel/hyp-stub.S
> > +++ b/arch/arm64/kernel/hyp-stub.S
> > @@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors)
> > ventry  el2_fiq_invalid // FIQ EL2t
> > ventry  el2_error_invalid   // Error EL2t
> >  
> > -   ventry  el2_sync_invalid// Synchronous EL2h
> > +   ventry  elx_sync// Synchronous EL2h
> > ventry  el2_irq_invalid // IRQ EL2h
> > ventry  el2_fiq_invalid // FIQ EL2h
> > ventry  el2_error_invalid   // Error EL2h
> >  
> > -   ventry  el1_sync// Synchronous 64-bit EL1
> > +   ventry  elx_sync// Synchronous 64-bit EL1
> > ventry  el1_irq_invalid // IRQ 64-bit EL1
> > ventry  el1_fiq_invalid // FIQ 64-bit EL1
> > ventry  el1_error_invalid   // Error 64-bit EL1
> > @@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors)
> >  
> > .align 11
> >  
> > -SYM_CODE_START_LOCAL(el1_sync)
> > +SYM_CODE_START_LOCAL(elx_sync)
> > cmp x0, #HVC_SET_VECTORS
> > b.ne1f
> > msr vbar_el2, x1
> > @@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync)
> >  
> >  9: mov x0, xzr
> > eret
> > -SYM_CODE_END(el1_sync)
> > +SYM_CODE_END(elx_sync)
> >  
> >  // nVHE? No way! Give me the real thing!
> >  SYM_CODE_START_LOCAL(mutate_to_vhe)
> > @@ -243,3 +243,23 @@ SYM_FUNC_START(switch_to_vhe)
> >  #endif
> > ret
> >  SYM_FUNC_END(switch_to_vhe)
> > +
> > +SYM_FUNC_START(stick_to_vhe)
> > +   /*
> > +* Make sure the switch to VHE cannot fail, by overriding the
> > +* override. This is hilarious.
> > +*/
> > +   adr_l   x1, id_aa64mmfr1_override
> > +   add x1, x1, #FTR_OVR_MASK_OFFSET
> > +   dc  civac, x1
> > +   dsb sy
> > +   isb
> 
> Why do we need an ISB here?

Hmmm. Probably me being paranoid and trying to come up with something
for Hector to test before I had access to the HW. The DSB is more than
enough to order CMO and load/store.

> > +   ldr x0, [x1]
> > +   bic x0, x0, #(0xf << ID_AA64MMFR1_VHE_SHIFT)
> > +   str x0, [x1]
> 
> I find it a bit bizarre doing this here, as for the primary CPU we're still
> a way away from parsing the early paramaters and for secondary 

Re: [RFT PATCH v3 01/27] arm64: Cope with CPUs stuck in VHE mode

2021-03-24 Thread Will Deacon
On Fri, Mar 05, 2021 at 06:38:36AM +0900, Hector Martin wrote:
> From: Marc Zyngier 
> 
> It seems that the CPU known as Apple M1 has the terrible habit
> of being stuck with HCR_EL2.E2H==1, in violation of the architecture.
> 
> Try and work around this deplorable state of affairs by detecting
> the stuck bit early and short-circuit the nVHE dance. It is still
> unknown whether there are many more such nuggets to be found...
> 
> Reported-by: Hector Martin 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/kernel/head.S | 33 ++---
>  arch/arm64/kernel/hyp-stub.S | 28 
>  2 files changed, 54 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 66b0e0b66e31..673002b11865 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr)
>   * booted in EL1 or EL2 respectively.
>   */
>  SYM_FUNC_START(init_kernel_el)
> - mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
> - msr sctlr_el1, x0
> -
>   mrs x0, CurrentEL
>   cmp x0, #CurrentEL_EL2
>   b.eqinit_el2
>  
>  SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
> + mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
> + msr sctlr_el1, x0
>   isb
>   mov_q   x0, INIT_PSTATE_EL1
>   msr spsr_el1, x0
> @@ -504,6 +503,34 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
>   msr vbar_el2, x0
>   isb
>  
> + /*
> +  * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
> +  * making it impossible to start in nVHE mode. Is that
> +  * compliant with the architecture? Absolutely not!
> +  */
> + mrs x0, hcr_el2
> + and x0, x0, #HCR_E2H
> + cbz x0, 1f
> +
> + /* Switching to VHE requires a sane SCTLR_EL1 as a start */
> + mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
> + msr_s   SYS_SCTLR_EL12, x0
> +
> + /*
> +  * Force an eret into a helper "function", and let it return
> +  * to our original caller... This makes sure that we have
> +  * initialised the basic PSTATE state.
> +  */
> + mov x0, #INIT_PSTATE_EL2
> + msr spsr_el1, x0
> + adr_l   x0, stick_to_vhe
> + msr elr_el1, x0
> + eret
> +
> +1:
> + mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
> + msr sctlr_el1, x0
> +
>   msr elr_el2, lr
>   mov w0, #BOOT_CPU_MODE_EL2
>   eret
> diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
> index 5eccbd62fec8..c7601030ee82 100644
> --- a/arch/arm64/kernel/hyp-stub.S
> +++ b/arch/arm64/kernel/hyp-stub.S
> @@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors)
>   ventry  el2_fiq_invalid // FIQ EL2t
>   ventry  el2_error_invalid   // Error EL2t
>  
> - ventry  el2_sync_invalid// Synchronous EL2h
> + ventry  elx_sync// Synchronous EL2h
>   ventry  el2_irq_invalid // IRQ EL2h
>   ventry  el2_fiq_invalid // FIQ EL2h
>   ventry  el2_error_invalid   // Error EL2h
>  
> - ventry  el1_sync// Synchronous 64-bit EL1
> + ventry  elx_sync// Synchronous 64-bit EL1
>   ventry  el1_irq_invalid // IRQ 64-bit EL1
>   ventry  el1_fiq_invalid // FIQ 64-bit EL1
>   ventry  el1_error_invalid   // Error 64-bit EL1
> @@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors)
>  
>   .align 11
>  
> -SYM_CODE_START_LOCAL(el1_sync)
> +SYM_CODE_START_LOCAL(elx_sync)
>   cmp x0, #HVC_SET_VECTORS
>   b.ne1f
>   msr vbar_el2, x1
> @@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync)
>  
>  9:   mov x0, xzr
>   eret
> -SYM_CODE_END(el1_sync)
> +SYM_CODE_END(elx_sync)
>  
>  // nVHE? No way! Give me the real thing!
>  SYM_CODE_START_LOCAL(mutate_to_vhe)
> @@ -243,3 +243,23 @@ SYM_FUNC_START(switch_to_vhe)
>  #endif
>   ret
>  SYM_FUNC_END(switch_to_vhe)
> +
> +SYM_FUNC_START(stick_to_vhe)
> + /*
> +  * Make sure the switch to VHE cannot fail, by overriding the
> +  * override. This is hilarious.
> +  */
> + adr_l   x1, id_aa64mmfr1_override
> + add x1, x1, #FTR_OVR_MASK_OFFSET
> + dc  civac, x1
> + dsb sy
> + isb

Why do we need an ISB here?

> + ldr x0, [x1]
> + bic x0, x0, #(0xf << ID_AA64MMFR1_VHE_SHIFT)
> + str x0, [x1]

I find it a bit bizarre doing this here, as for the primary CPU we're still
a way away from parsing the early paramaters and for secondary CPUs this
doesn't need to be done for each one. Furthermore, this same code is run
on the resume path, which can probably then race with itself.

Is it possible to do it later on the boot CPU only, e.g. in
init_feature_override()? We should probably also log a warning that we're
ignoring the option because nVHE is not available.

Will


[RFT PATCH v3 01/27] arm64: Cope with CPUs stuck in VHE mode

2021-03-04 Thread Hector Martin
From: Marc Zyngier 

It seems that the CPU known as Apple M1 has the terrible habit
of being stuck with HCR_EL2.E2H==1, in violation of the architecture.

Try and work around this deplorable state of affairs by detecting
the stuck bit early and short-circuit the nVHE dance. It is still
unknown whether there are many more such nuggets to be found...

Reported-by: Hector Martin 
Signed-off-by: Marc Zyngier 
---
 arch/arm64/kernel/head.S | 33 ++---
 arch/arm64/kernel/hyp-stub.S | 28 
 2 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 66b0e0b66e31..673002b11865 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr)
  * booted in EL1 or EL2 respectively.
  */
 SYM_FUNC_START(init_kernel_el)
-   mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
-   msr sctlr_el1, x0
-
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.eqinit_el2
 
 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
+   mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
+   msr sctlr_el1, x0
isb
mov_q   x0, INIT_PSTATE_EL1
msr spsr_el1, x0
@@ -504,6 +503,34 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
msr vbar_el2, x0
isb
 
+   /*
+* Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
+* making it impossible to start in nVHE mode. Is that
+* compliant with the architecture? Absolutely not!
+*/
+   mrs x0, hcr_el2
+   and x0, x0, #HCR_E2H
+   cbz x0, 1f
+
+   /* Switching to VHE requires a sane SCTLR_EL1 as a start */
+   mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
+   msr_s   SYS_SCTLR_EL12, x0
+
+   /*
+* Force an eret into a helper "function", and let it return
+* to our original caller... This makes sure that we have
+* initialised the basic PSTATE state.
+*/
+   mov x0, #INIT_PSTATE_EL2
+   msr spsr_el1, x0
+   adr_l   x0, stick_to_vhe
+   msr elr_el1, x0
+   eret
+
+1:
+   mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
+   msr sctlr_el1, x0
+
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2
eret
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 5eccbd62fec8..c7601030ee82 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors)
ventry  el2_fiq_invalid // FIQ EL2t
ventry  el2_error_invalid   // Error EL2t
 
-   ventry  el2_sync_invalid// Synchronous EL2h
+   ventry  elx_sync// Synchronous EL2h
ventry  el2_irq_invalid // IRQ EL2h
ventry  el2_fiq_invalid // FIQ EL2h
ventry  el2_error_invalid   // Error EL2h
 
-   ventry  el1_sync// Synchronous 64-bit EL1
+   ventry  elx_sync// Synchronous 64-bit EL1
ventry  el1_irq_invalid // IRQ 64-bit EL1
ventry  el1_fiq_invalid // FIQ 64-bit EL1
ventry  el1_error_invalid   // Error 64-bit EL1
@@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors)
 
.align 11
 
-SYM_CODE_START_LOCAL(el1_sync)
+SYM_CODE_START_LOCAL(elx_sync)
cmp x0, #HVC_SET_VECTORS
b.ne1f
msr vbar_el2, x1
@@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync)
 
 9: mov x0, xzr
eret
-SYM_CODE_END(el1_sync)
+SYM_CODE_END(elx_sync)
 
 // nVHE? No way! Give me the real thing!
 SYM_CODE_START_LOCAL(mutate_to_vhe)
@@ -243,3 +243,23 @@ SYM_FUNC_START(switch_to_vhe)
 #endif
ret
 SYM_FUNC_END(switch_to_vhe)
+
+SYM_FUNC_START(stick_to_vhe)
+   /*
+* Make sure the switch to VHE cannot fail, by overriding the
+* override. This is hilarious.
+*/
+   adr_l   x1, id_aa64mmfr1_override
+   add x1, x1, #FTR_OVR_MASK_OFFSET
+   dc  civac, x1
+   dsb sy
+   isb
+   ldr x0, [x1]
+   bic x0, x0, #(0xf << ID_AA64MMFR1_VHE_SHIFT)
+   str x0, [x1]
+
+   mov x0, #HVC_VHE_RESTART
+   hvc #0
+   mov x0, #BOOT_CPU_MODE_EL2
+   ret
+SYM_FUNC_END(stick_to_vhe)
-- 
2.30.0