[PATCH v3 20/20] KVM: arm64: Take any host SError before entering the guest

2017-10-05 Thread James Morse
On VHE systems KVM masks SError before switching the VBAR value. Any
host RAS error that the CPU knew about before world-switch may become
pending as an SError during world-switch, and only be taken once we enter
the guest.

Until KVM can take RAS SErrors during world switch, add an ESB to
force any RAS errors to be synchronised and taken on the host before
we enter world switch.

RAS errors that become pending during world switch are still taken
once we enter the guest.

Signed-off-by: James Morse 
---
 arch/arm64/include/asm/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index b3c53d8b6a60..6a48f9d18c58 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -392,6 +392,7 @@ static inline void __cpu_init_stage2(void)
 
 static inline void kvm_arm_vhe_guest_enter(void)
 {
+   esb();
local_mask_daif();
 }
 
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 19/20] KVM: arm64: Handle RAS SErrors from EL2 on guest exit

2017-10-05 Thread James Morse
We expect to have firmware-first handling of RAS SErrors, with errors
notified via an APEI method. For systems without firmware-first, add
some minimal handling to KVM.

There are two ways KVM can take an SError due to a guest, either may be a
RAS error: we exit the guest due to an SError routed to EL2 by HCR_EL2.AMO,
or we take an SError from EL2 when we unmask PSTATE.A from __guest_exit.

The current SError from EL2 code unmasks SError and tries to fence any
pending SError into a single instruction window. It then leaves SError
unmasked.

With the v8.2 RAS Extensions we may take an SError for a 'corrected'
error, but KVM is only able to handle SError from EL2 if they occur
during this single instruction window...

The RAS Extensions give us a new instruction to synchronise and
consume SErrors. The RAS Extensions document (ARM DDI0587),
'2.4.1 ESB and Unrecoverable errors' describes ESB as synchronising
SError interrupts generated by 'instructions, translation table walks,
hardware updates to the translation tables, and instruction fetches on
the same PE'. This makes ESB equivalent to KVMs existing
'dsb, mrs-daifclr, isb' sequence.

Use the alternatives to synchronise and consume any SError using ESB
instead of unmasking and taking the SError. Set ARM_EXIT_WITH_SERROR_BIT
in the exit_code so that we can restart the vcpu if it turns out this
SError has no impact on the vcpu.

Signed-off-by: James Morse 
---
 arch/arm64/include/asm/kvm_emulate.h |  5 +
 arch/arm64/include/asm/kvm_host.h|  1 +
 arch/arm64/kernel/asm-offsets.c  |  1 +
 arch/arm64/kvm/handle_exit.c | 10 +-
 arch/arm64/kvm/hyp/entry.S   | 13 +
 5 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index 8a7a838eb17a..8274d16df3cd 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -173,6 +173,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const 
struct kvm_vcpu *vcpu)
return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
 }
 
+static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
+{
+   return vcpu->arch.fault.disr_el1;
+}
+
 static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
 {
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 9564e6d56b79..b3c53d8b6a60 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -89,6 +89,7 @@ struct kvm_vcpu_fault_info {
u32 esr_el2;/* Hyp Syndrom Register */
u64 far_el2;/* Hyp Fault Address Register */
u64 hpfar_el2;  /* Hyp IPA Fault Address Register */
+   u64 disr_el1;   /* Deferred [SError] Status Register */
 };
 
 /*
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 71bf088f1e4b..121889c49542 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -130,6 +130,7 @@ int main(void)
   BLANK();
 #ifdef CONFIG_KVM_ARM_HOST
   DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
+  DEFINE(VCPU_FAULT_DISR,  offsetof(struct kvm_vcpu, arch.fault.disr_el1));
   DEFINE(CPU_GP_REGS,  offsetof(struct kvm_cpu_context, gp_regs));
   DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
   DEFINE(CPU_FP_REGS,  offsetof(struct kvm_regs, fp_regs));
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 345fdbba6c2e..e1e6cfe7d4d9 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -23,6 +23,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -208,7 +209,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
*vcpu_pc(vcpu) -= adj;
}
 
-   kvm_inject_vabt(vcpu);
+   if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
+   u64 disr = kvm_vcpu_get_disr(vcpu);
+
+   kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
+   } else {
+   kvm_inject_vabt(vcpu);
+   }
+
return 1;
}
 
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 12ee62d6d410..96caa5328b3a 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -124,6 +124,17 @@ ENTRY(__guest_exit)
// Now restore the host regs
restore_callee_saved_regs x2
 
+alternative_if ARM64_HAS_RAS_EXTN
+   // If we have the RAS extensions we can consume a pending error
+   // without an unmask-SError and isb.
+   esb
+   mrs_s   x2, SYS_DISR_EL1
+   str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)]
+   cbz x2, 1f
+   msr_s   SYS_DISR_EL1, xzr
+   orr x0, x0, 

[PATCH v3 18/20] KVM: arm64: Handle RAS SErrors from EL1 on guest exit

2017-10-05 Thread James Morse
We expect to have firmware-first handling of RAS SErrors, with errors
notified via an APEI method. For systems without firmware-first, add
some minimal handling to KVM.

There are two ways KVM can take an SError due to a guest, either may be a
RAS error: we exit the guest due to an SError routed to EL2 by HCR_EL2.AMO,
or we take an SError from EL2 when we unmask PSTATE.A from __guest_exit.

For SError that interrupt a guest and are routed to EL2 the existing
behaviour is to inject an impdef SError into the guest.

Add code to handle RAS SError based on the ESR. For uncontained errors
arm64_is_blocking_ras_serror() will panic(), these errors compromise
the host too. All other error types are contained: For the 'blocking'
errors the vCPU can't make progress, so we inject a virtual SError.
We ignore contained errors where we can make progress as if we're lucky,
we may not hit them again.

Signed-off-by: James Morse 
---
 arch/arm64/kvm/handle_exit.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 7debb74843a0..345fdbba6c2e 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -28,12 +28,19 @@
 #include 
 #include 
 #include 
+#include 
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
+static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
+{
+   if (!arm64_is_ras_serror(esr) || arm64_blocking_ras_serror(NULL, esr))
+   kvm_inject_vabt(vcpu);
+}
+
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
int ret;
@@ -211,7 +218,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
case ARM_EXCEPTION_IRQ:
return 1;
case ARM_EXCEPTION_EL1_SERROR:
-   kvm_inject_vabt(vcpu);
+   kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu));
return 1;
case ARM_EXCEPTION_TRAP:
/*
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 14/20] arm64: kernel: Prepare for a DISR user

2017-10-05 Thread James Morse
KVM would like to consume any pending SError (or RAS error) after guest
exit. Today it has to unmask SError and use dsb+isb to synchronise the
CPU. With the RAS extensions we can use ESB to synchronise any pending
SError.

Add the necessary macros to allow DISR to be read and converted to an
ESR.

We clear the DISR register when we enable the RAS cpufeature. While the
kernel has issued ESB instructions before this point it has only done so
with SError unmasked, any value we find in DISR must have belonged to
firmware. Executing an ESB instruction is the only way to update DISR,
so we can expect firmware to have handled any deferred SError. By the
same logic we clear DISR in the idle path.

Signed-off-by: James Morse 
---
 arch/arm64/include/asm/assembler.h |  7 +++
 arch/arm64/include/asm/esr.h   |  7 +++
 arch/arm64/include/asm/exception.h | 14 ++
 arch/arm64/include/asm/processor.h |  1 +
 arch/arm64/include/asm/sysreg.h|  1 +
 arch/arm64/kernel/cpufeature.c |  9 +
 arch/arm64/mm/proc.S   |  5 +
 7 files changed, 44 insertions(+)

diff --git a/arch/arm64/include/asm/assembler.h 
b/arch/arm64/include/asm/assembler.h
index 7ffb2a629dc9..d4c0adf792ee 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -108,6 +108,13 @@
.endm
 
 /*
+ * RAS Error Synchronization barrier
+ */
+   .macro  esb
+   hint#16
+   .endm
+
+/*
  * NOP sequence
  */
.macro  nops, num
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 8ea52f15bf1c..b3f17fd18b14 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -136,6 +136,13 @@
 #define ESR_ELx_WFx_ISS_WFE(UL(1) << 0)
 #define ESR_ELx_xVC_IMM_MASK   ((1UL << 16) - 1)
 
+#define DISR_EL1_IDS   (UL(1) << 24)
+/*
+ * DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean
+ * different things in the future...
+ */
+#define DISR_EL1_ESR_MASK  (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
+
 /* ESR value templates for specific events */
 
 /* BRK instruction trap from AArch64 state */
diff --git a/arch/arm64/include/asm/exception.h 
b/arch/arm64/include/asm/exception.h
index 0c2eec490abf..bc30429d8e91 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -18,6 +18,8 @@
 #ifndef __ASM_EXCEPTION_H
 #define __ASM_EXCEPTION_H
 
+#include 
+
 #include 
 
 #define __exception__attribute__((section(".exception.text")))
@@ -27,4 +29,16 @@
 #define __exception_irq_entry  __exception
 #endif
 
+static inline u32 disr_to_esr(u64 disr)
+{
+   unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
+
+   if ((disr & DISR_EL1_IDS) == 0)
+   esr |= (disr & DISR_EL1_ESR_MASK);
+   else
+   esr |= (disr & ESR_ELx_ISS_MASK);
+
+   return esr;
+}
+
 #endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/processor.h 
b/arch/arm64/include/asm/processor.h
index 6b72ddc33d06..9de3f839be8a 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -194,5 +194,6 @@ static inline void spin_lock_prefetch(const void *ptr)
 int cpu_enable_pan(void *__unused);
 int cpu_enable_cache_maint_trap(void *__unused);
 int cpu_enable_iesb(void *__unused);
+int cpu_clear_disr(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4500a70c6a57..427c36bc5dd6 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -179,6 +179,7 @@
 #define SYS_AMAIR_EL1  sys_reg(3, 0, 10, 3, 0)
 
 #define SYS_VBAR_EL1   sys_reg(3, 0, 12, 0, 0)
+#define SYS_DISR_EL1   sys_reg(3, 0, 12, 1,  1)
 
 #define SYS_ICC_IAR0_EL1   sys_reg(3, 0, 12, 8, 0)
 #define SYS_ICC_EOIR0_EL1  sys_reg(3, 0, 12, 8, 1)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 626539995316..da8fd37c0984 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -911,6 +911,7 @@ static const struct arm64_cpu_capabilities arm64_features[] 
= {
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64PFR0_RAS_SHIFT,
.min_field_value = ID_AA64PFR0_RAS_V1,
+   .enable = cpu_clear_disr,
},
 #ifdef CONFIG_ARM64_IESB
{
@@ -1342,3 +1343,11 @@ int cpu_enable_iesb(void *__unused)
 
return 0;
 }
+
+int cpu_clear_disr(void *__unused)
+{
+   /* Firmware may have left a deferred SError in this register. */
+   write_sysreg_s(0, SYS_DISR_EL1);
+
+   return 0;
+}
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 95233dfc4c39..ac571223672d 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -124,6 +124,11 @@ ENTRY(cpu_do_resume)
ubfxx11, x11, #1, #1
msr oslar_el1, x11

[PATCH v3 17/20] KVM: arm64: Save ESR_EL2 on guest SError

2017-10-05 Thread James Morse
When we exit a guest due to an SError the vcpu fault info isn't updated
with the ESR. Today this is only done for traps.

The v8.2 RAS Extensions define ISS values for SError. Update the vcpu's
fault_info with the ESR on SError so that handle_exit() can determine
if this was a RAS SError and decode its severity.

Signed-off-by: James Morse 
---
 arch/arm64/kvm/hyp/switch.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index af37658223a0..cba6d8ac105c 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -230,13 +230,20 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, 
u64 *hpfar)
return true;
 }
 
+static void __hyp_text __populate_fault_info_esr(struct kvm_vcpu *vcpu)
+{
+   vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
+}
+
 static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
 {
-   u64 esr = read_sysreg_el2(esr);
-   u8 ec = ESR_ELx_EC(esr);
+   u8 ec;
+   u64 esr;
u64 hpfar, far;
 
-   vcpu->arch.fault.esr_el2 = esr;
+   __populate_fault_info_esr(vcpu);
+   esr = vcpu->arch.fault.esr_el2;
+   ec = ESR_ELx_EC(esr);
 
if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
return true;
@@ -325,6 +332,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 */
if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
goto again;
+   else if (ARM_EXCEPTION_CODE(exit_code) == ARM_EXCEPTION_EL1_SERROR)
+   __populate_fault_info_esr(vcpu);
 
if (static_branch_unlikely(_v2_cpuif_trap) &&
exit_code == ARM_EXCEPTION_TRAP) {
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 11/20] arm64: cpufeature: Detect CPU RAS Extentions

2017-10-05 Thread James Morse
From: Xie XiuQi 

ARM's v8.2 Extensions add support for Reliability, Availability and
Serviceability (RAS). On CPUs with these extensions system software
can use additional barriers to isolate errors and determine if faults
are pending.

Add cpufeature detection and a barrier in the context-switch code.
There is no need to use alternatives for this as CPUs that don't
support this feature will treat the instruction as a nop.

Platform level RAS support may require additional firmware support.

Signed-off-by: Xie XiuQi 
[Rebased, added esb and config option, reworded commit message]
Signed-off-by: James Morse 
---
 arch/arm64/Kconfig   | 16 
 arch/arm64/include/asm/barrier.h |  1 +
 arch/arm64/include/asm/cpucaps.h |  3 ++-
 arch/arm64/include/asm/sysreg.h  |  2 ++
 arch/arm64/kernel/cpufeature.c   | 13 +
 arch/arm64/kernel/process.c  |  3 +++
 6 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 70dfe4e9ccc5..b68f5e93baac 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -973,6 +973,22 @@ config ARM64_PMEM
  operations if DC CVAP is not supported (following the behaviour of
  DC CVAP itself if the system does not define a point of persistence).
 
+config ARM64_RAS_EXTN
+   bool "Enable support for RAS CPU Extensions"
+   default y
+   help
+ CPUs that support the Reliability, Availability and Serviceability
+ (RAS) Extensions, part of ARMv8.2 are able to track faults and
+ errors, classify them and report them to software.
+
+ On CPUs with these extensions system software can use additional
+ barriers to determine if faults are pending and read the
+ classification from a new set of registers.
+
+ Selecting this feature will allow the kernel to use these barriers
+ and access the new registers if the system supports the extension.
+ Platform RAS features may additionally depend on firmware support.
+
 endmenu
 
 config ARM64_MODULE_CMODEL_LARGE
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 0fe7e43b7fbc..8b0a0eb67625 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -30,6 +30,7 @@
 #define isb()  asm volatile("isb" : : : "memory")
 #define dmb(opt)   asm volatile("dmb " #opt : : : "memory")
 #define dsb(opt)   asm volatile("dsb " #opt : : : "memory")
+#define esb()  asm volatile("hint #16"  : : : "memory")
 
 #define mb()   dsb(sy)
 #define rmb()  dsb(ld)
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 8da621627d7c..4820d441bfb9 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -40,7 +40,8 @@
 #define ARM64_WORKAROUND_85892119
 #define ARM64_WORKAROUND_CAVIUM_30115  20
 #define ARM64_HAS_DCPOP21
+#define ARM64_HAS_RAS_EXTN 22
 
-#define ARM64_NCAPS22
+#define ARM64_NCAPS23
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index f707fed5886f..64e2a80fd749 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -332,6 +332,7 @@
 #define ID_AA64ISAR1_DPB_SHIFT 0
 
 /* id_aa64pfr0 */
+#define ID_AA64PFR0_RAS_SHIFT  28
 #define ID_AA64PFR0_GIC_SHIFT  24
 #define ID_AA64PFR0_ASIMD_SHIFT20
 #define ID_AA64PFR0_FP_SHIFT   16
@@ -340,6 +341,7 @@
 #define ID_AA64PFR0_EL1_SHIFT  4
 #define ID_AA64PFR0_EL0_SHIFT  0
 
+#define ID_AA64PFR0_RAS_V1 0x1
 #define ID_AA64PFR0_FP_NI  0xf
 #define ID_AA64PFR0_FP_SUPPORTED   0x0
 #define ID_AA64PFR0_ASIMD_NI   0xf
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index cd52d365d1f0..0fc017b55cb1 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -125,6 +125,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+   ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 
ID_AA64PFR0_RAS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 
ID_AA64PFR0_GIC_SHIFT, 4, 0),
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 
ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 
ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
@@ -900,6 +901,18 @@ static const struct arm64_cpu_capabilities 
arm64_features[] = {
.min_field_value = 1,
},
 #endif
+#ifdef CONFIG_ARM64_RAS_EXTN
+   {
+   .desc = "RAS Extension Support",
+   

[PATCH v3 10/20] arm64: entry.S: move SError handling into a C function for future expansion

2017-10-05 Thread James Morse
From: Xie XiuQi 

Today SError is taken using the inv_entry macro that ends up in
bad_mode.

SError can be used by the RAS Extensions to notify either the OS or
firmware of CPU problems, some of which may have been corrected.

To allow this handling to be added, add a do_serror() C function
that just panic()s. Add the entry.S boiler plate to save/restore the
CPU registers and unmask debug exceptions. Future patches may change
do_serror() to return if the SError Interrupt was notification of a
corrected error.

Signed-off-by: Xie XiuQi 
Signed-off-by: Wang Xiongfeng 
[Split out of a bigger patch, added compat path, renamed, enabled debug
 exceptions]
Signed-off-by: James Morse 
---
 arch/arm64/Kconfig|  2 +-
 arch/arm64/kernel/entry.S | 36 +---
 arch/arm64/kernel/traps.c | 13 +
 3 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0df64a6a56d4..70dfe4e9ccc5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -98,7 +98,7 @@ config ARM64
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP if NUMA
-   select HAVE_NMI if ACPI_APEI_SEA
+   select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index df085ec003b0..e147c1d00b41 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -375,18 +375,18 @@ ENTRY(vectors)
kernel_ventry   el1_sync// Synchronous EL1h
kernel_ventry   el1_irq // IRQ EL1h
kernel_ventry   el1_fiq_invalid // FIQ EL1h
-   kernel_ventry   el1_error_invalid   // Error EL1h
+   kernel_ventry   el1_error   // Error EL1h
 
kernel_ventry   el0_sync// Synchronous 64-bit 
EL0
kernel_ventry   el0_irq // IRQ 64-bit EL0
kernel_ventry   el0_fiq_invalid // FIQ 64-bit EL0
-   kernel_ventry   el0_error_invalid   // Error 64-bit EL0
+   kernel_ventry   el0_error   // Error 64-bit EL0
 
 #ifdef CONFIG_COMPAT
kernel_ventry   el0_sync_compat // Synchronous 32-bit 
EL0
kernel_ventry   el0_irq_compat  // IRQ 32-bit EL0
kernel_ventry   el0_fiq_invalid_compat  // FIQ 32-bit EL0
-   kernel_ventry   el0_error_invalid_compat// Error 32-bit EL0
+   kernel_ventry   el0_error_compat// Error 32-bit EL0
 #else
kernel_ventry   el0_sync_invalid// Synchronous 32-bit 
EL0
kernel_ventry   el0_irq_invalid // IRQ 32-bit EL0
@@ -455,10 +455,6 @@ ENDPROC(el0_error_invalid)
 el0_fiq_invalid_compat:
inv_entry 0, BAD_FIQ, 32
 ENDPROC(el0_fiq_invalid_compat)
-
-el0_error_invalid_compat:
-   inv_entry 0, BAD_ERROR, 32
-ENDPROC(el0_error_invalid_compat)
 #endif
 
 el1_sync_invalid:
@@ -663,6 +659,10 @@ el0_svc_compat:
 el0_irq_compat:
kernel_entry 0, 32
b   el0_irq_naked
+
+el0_error_compat:
+   kernel_entry 0, 32
+   b   el0_error_naked
 #endif
 
 el0_da:
@@ -780,6 +780,28 @@ el0_irq_naked:
b   ret_to_user
 ENDPROC(el0_irq)
 
+el1_error:
+   kernel_entry 1
+   mrs x1, esr_el1
+   enable_dbg
+   mov x0, sp
+   bl  do_serror
+   kernel_exit 1
+ENDPROC(el1_error)
+
+el0_error:
+   kernel_entry 0
+el0_error_naked:
+   mrs x1, esr_el1
+   enable_dbg
+   mov x0, sp
+   bl  do_serror
+   enable_daif
+   ct_user_exit
+   b   ret_to_user
+ENDPROC(el0_error)
+
+
 /*
  * This is the fast syscall return path.  We do as little as possible here,
  * and this includes saving x0 back into the kernel stack.
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index d046495f67eb..210647d33bea 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -709,6 +709,19 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
 }
 #endif
 
+asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+{
+   nmi_enter();
+
+   console_verbose();
+
+   pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
+   smp_processor_id(), esr, esr_get_class_string(esr));
+   __show_regs(regs);
+
+   panic("Asynchronous SError Interrupt");
+}
+
 void __pte_error(const char *file, int line, unsigned long val)
 {
pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 15/20] KVM: arm64: Set an impdef ESR for Virtual-SError using VSESR_EL2.

2017-10-05 Thread James Morse
Prior to v8.2's RAS Extensions, the HCR_EL2.VSE 'virtual SError' feature
generated an SError with an implementation defined ESR_EL1.ISS, because we
had no mechanism to specify the ESR value.

On Juno this generates an all-zero ESR, the most significant bit 'ISV'
is clear indicating the remainder of the ISS field is invalid.

With the RAS Extensions we have a mechanism to specify this value, and the
most significant bit has a new meaning: 'IDS - Implementation Defined
Syndrome'. An all-zero SError ESR now means: 'RAS error: Uncategorized'
instead of 'no valid ISS'.

Add KVM support for the VSESR_EL2 register to specify an ESR value when
HCR_EL2.VSE generates a virtual SError. Change kvm_inject_vabt() to
specify an implementation-defined value.

We only need to restore the VSESR_EL2 value when HCR_EL2.VSE is set, KVM
save/restores this bit during __deactivate_traps() and hardware clears the
bit once the guest has consumed the virtual-SError.

Future patches may add an API (or KVM CAP) to pend a virtual SError with
a specified ESR.

Cc: Dongjiu Geng 
Signed-off-by: James Morse 
---
 arch/arm64/include/asm/kvm_emulate.h |  5 +
 arch/arm64/include/asm/kvm_host.h|  3 +++
 arch/arm64/include/asm/sysreg.h  |  1 +
 arch/arm64/kvm/hyp/switch.c  |  4 
 arch/arm64/kvm/inject_fault.c| 13 -
 5 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h 
b/arch/arm64/include/asm/kvm_emulate.h
index e5df3fce0008..8a7a838eb17a 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -61,6 +61,11 @@ static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, 
unsigned long hcr)
vcpu->arch.hcr_el2 = hcr;
 }
 
+static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
+{
+   vcpu->arch.vsesr_el2 = vsesr;
+}
+
 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
 {
return (unsigned long *)_gp_regs(vcpu)->regs.pc;
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index d3eb79a9ed6b..0af35e71fedb 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -277,6 +277,9 @@ struct kvm_vcpu_arch {
 
/* Detect first run of a vcpu */
bool has_run_once;
+
+   /* Virtual SError ESR to restore when HCR_EL2.VSE is set */
+   u64 vsesr_el2;
 };
 
 #define vcpu_gp_regs(v)(&(v)->arch.ctxt.gp_regs)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 427c36bc5dd6..a493e93de296 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -253,6 +253,7 @@
 
 #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
 #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
+#define SYS_VSESR_EL2  sys_reg(3, 4, 5, 2, 3)
 #define SYS_FPEXC32_EL2sys_reg(3, 4, 5, 3, 0)
 
 #define __SYS__AP0Rx_EL2(x)sys_reg(3, 4, 12, 8, x)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 945e79c641c4..af37658223a0 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -86,6 +86,10 @@ static void __hyp_text __activate_traps(struct kvm_vcpu 
*vcpu)
isb();
}
write_sysreg(val, hcr_el2);
+
+   if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (val & HCR_VSE))
+   write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
/*
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index da6a8cfa54a0..52f7f66f1356 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -232,14 +232,25 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
inject_undef64(vcpu);
 }
 
+static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
+{
+   vcpu_set_vsesr(vcpu, esr);
+   vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE);
+}
+
 /**
  * kvm_inject_vabt - inject an async abort / SError into the guest
  * @vcpu: The VCPU to receive the exception
  *
  * It is assumed that this code is called from the VCPU thread and that the
  * VCPU therefore is not currently executing guest code.
+ *
+ * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with
+ * the remaining ISS all-zeros so that this error is not interpreted as an
+ * uncatagorized RAS error. Without the RAS Extensions we can't specify an ESR
+ * value, so the CPU generates an imp-def value.
  */
 void kvm_inject_vabt(struct kvm_vcpu *vcpu)
 {
-   vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE);
+   pend_guest_serror(vcpu, ESR_ELx_ISV);
 }
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 16/20] KVM: arm64: Save/Restore guest DISR_EL1

2017-10-05 Thread James Morse
If we deliver a virtual SError to the guest, the guest may defer it
with an ESB instruction. The guest reads the deferred value via DISR_EL1,
but the guests view of DISR_EL1 is re-mapped to VDISR_EL2 when HCR_EL2.AMO
is set.

Add the KVM code to save/restore VDISR_EL2, and make it accessible to
userspace as DISR_EL1.

Signed-off-by: James Morse 
---
 arch/arm64/include/asm/kvm_host.h | 1 +
 arch/arm64/include/asm/sysreg.h   | 1 +
 arch/arm64/kvm/hyp/sysreg-sr.c| 6 ++
 arch/arm64/kvm/sys_regs.c | 1 +
 4 files changed, 9 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 0af35e71fedb..9564e6d56b79 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -120,6 +120,7 @@ enum vcpu_sysreg {
PAR_EL1,/* Physical Address Register */
MDSCR_EL1,  /* Monitor Debug System Control Register */
MDCCINT_EL1,/* Monitor Debug Comms Channel Interrupt Enable Reg */
+   DISR_EL1,   /* Deferred Interrupt Status Register */
 
/* Performance Monitors Registers */
PMCR_EL0,   /* Control Register */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index a493e93de296..1b8b9012234d 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -256,6 +256,7 @@
 #define SYS_VSESR_EL2  sys_reg(3, 4, 5, 2, 3)
 #define SYS_FPEXC32_EL2sys_reg(3, 4, 5, 3, 0)
 
+#define SYS_VDISR_EL2  sys_reg(3, 4, 12, 1,  1)
 #define __SYS__AP0Rx_EL2(x)sys_reg(3, 4, 12, 8, x)
 #define SYS_ICH_AP0R0_EL2  __SYS__AP0Rx_EL2(0)
 #define SYS_ICH_AP0R1_EL2  __SYS__AP0Rx_EL2(1)
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 934137647837..f4d604803b29 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -66,6 +66,9 @@ static void __hyp_text __sysreg_save_state(struct 
kvm_cpu_context *ctxt)
ctxt->gp_regs.sp_el1= read_sysreg(sp_el1);
ctxt->gp_regs.elr_el1   = read_sysreg_el1(elr);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+
+   if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+   ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
 }
 
 static hyp_alternate_select(__sysreg_call_save_host_state,
@@ -119,6 +122,9 @@ static void __hyp_text __sysreg_restore_state(struct 
kvm_cpu_context *ctxt)
write_sysreg(ctxt->gp_regs.sp_el1,  sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+
+   if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+   write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
 }
 
 static hyp_alternate_select(__sysreg_call_restore_host_state,
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2e070d3baf9f..713275b501ce 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -963,6 +963,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
 
{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
+   { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
 
{ SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only },
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 09/20] KVM: arm/arm64: mask/unmask daif around VHE guests

2017-10-05 Thread James Morse
Non-VHE systems take an exception to EL2 in order to world-switch into the
guest. When returning from the guest KVM implicitly restores the DAIF
flags when it returns to the kernel at EL1.

With VHE none of this exception-level jumping happens, so KVMs
world-switch code is exposed to the host kernel's DAIF values, and KVM
spills the guest-exit DAIF values back into the host kernel.
On entry to a guest we have Debug and SError exceptions unmasked, KVM
has switched VBAR but isn't prepared to handle these. On guest exit
Debug exceptions are left disabled once we return to the host and will
stay this way until we enter user space.

Add a helper to mask/unmask DAIF around VHE guests. The unmask can only
happen after the hosts VBAR value has been synchronised by the isb in
__vhe_hyp_call (via kvm_call_hyp()). Masking could be as late as
setting KVMs VBAR value, but is kept here for symmetry.

Signed-off-by: James Morse 

---
Give me a kick if you want this reworked as a fix (which will then
conflict with this series), or a backportable version.

 arch/arm64/include/asm/kvm_host.h | 10 ++
 virt/kvm/arm/arm.c|  4 
 2 files changed, 14 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index e923b58606e2..d3eb79a9ed6b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -384,4 +385,13 @@ static inline void __cpu_init_stage2(void)
  "PARange is %d bits, unsupported configuration!", parange);
 }
 
+static inline void kvm_arm_vhe_guest_enter(void)
+{
+   local_mask_daif();
+}
+
+static inline void kvm_arm_vhe_guest_exit(void)
+{
+   local_restore_daif(DAIF_PROCCTX_NOIRQ);
+}
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index b9f68e4add71..665529924b34 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -698,9 +698,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 */
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
+   if (has_vhe())
+   kvm_arm_vhe_guest_enter();
 
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 
+   if (has_vhe())
+   kvm_arm_vhe_guest_exit();
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
/*
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 08/20] arm64: entry.S: convert elX_irq

2017-10-05 Thread James Morse
Following our 'dai' order, irqs should be processed with debug and
serror exceptions unmasked.

Add a helper to unmask these two, (and fiq for good measure).

Signed-off-by: James Morse 
---
 arch/arm64/include/asm/assembler.h | 4 
 arch/arm64/kernel/entry.S  | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h 
b/arch/arm64/include/asm/assembler.h
index c2a37e2f733c..7ffb2a629dc9 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -54,6 +54,10 @@
msr daif, \tmp
.endm
 
+   .macro enable_da_f
+   msr daifclr, #(8 | 4 | 1)
+   .endm
+
 /*
  * Enable and disable interrupts.
  */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f7dfe5d2b1fb..df085ec003b0 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -557,7 +557,7 @@ ENDPROC(el1_sync)
.align  6
 el1_irq:
kernel_entry 1
-   enable_dbg
+   enable_da_f
 #ifdef CONFIG_TRACE_IRQFLAGS
bl  trace_hardirqs_off
 #endif
@@ -766,7 +766,7 @@ ENDPROC(el0_sync)
 el0_irq:
kernel_entry 0
 el0_irq_naked:
-   enable_dbg
+   enable_da_f
 #ifdef CONFIG_TRACE_IRQFLAGS
bl  trace_hardirqs_off
 #endif
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 02/20] arm64: introduce an order for exceptions

2017-10-05 Thread James Morse
Currently SError is always masked in the kernel. To support RAS exceptions
using SError on hardware with the v8.2 RAS Extensions we need to unmask
SError as much as possible.

Let's define an order for masking and unmasking exceptions. 'dai' is
memorable and effectively what we have today.

Disabling debug exceptions should cause all other exceptions to be masked.
Masking SError should mask irq, but not disable debug exceptions.
Masking irqs has no side effects for other flags. Keeping to this order
makes it easier for entry.S to know which exceptions should be unmasked.

FIQ is never expected, but we mask it when we mask debug exceptions, and
unmask it at all other times.

Given masking debug exceptions masks everything, we don't need macros
to save/restore that bit independently. Remove them and switch the last
caller over to {en,dis}able_daif().

Signed-off-by: James Morse 
---
 arch/arm64/include/asm/irqflags.h  | 34 +-
 arch/arm64/kernel/debug-monitors.c |  5 +++--
 2 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/arch/arm64/include/asm/irqflags.h 
b/arch/arm64/include/asm/irqflags.h
index 8c581281fa12..9ecdca7011f0 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -21,6 +21,19 @@
 #include 
 
 /*
+ * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
+ * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai'
+ * order:
+ * Masking debug exceptions causes all other exceptions to be masked too/
+ * Masking SError masks irq, but not debug exceptions. Masking irqs has no
+ * side effects for other flags. Keeping to this order makes it easier for
+ * entry.S to know which exceptions should be unmasked.
+ *
+ * FIQ is never expected, but we mask it when we disable debug exceptions, and
+ * unmask it at all other times.
+ */
+
+/*
  * CPU interrupt mask handling.
  */
 static inline unsigned long arch_local_irq_save(void)
@@ -89,26 +102,5 @@ static inline int arch_irqs_disabled_flags(unsigned long 
flags)
 {
return flags & PSR_I_BIT;
 }
-
-/*
- * save and restore debug state
- */
-#define local_dbg_save(flags)  \
-   do {\
-   typecheck(unsigned long, flags);\
-   asm volatile(   \
-   "mrs%0, daif// local_dbg_save\n"\
-   "msrdaifset, #8"\
-   : "=r" (flags) : : "memory");   \
-   } while (0)
-
-#define local_dbg_restore(flags)   \
-   do {\
-   typecheck(unsigned long, flags);\
-   asm volatile(   \
-   "msrdaif, %0// local_dbg_restore\n" \
-   : : "r" (flags) : "memory");\
-   } while (0)
-
 #endif
 #endif
diff --git a/arch/arm64/kernel/debug-monitors.c 
b/arch/arm64/kernel/debug-monitors.c
index c7ef99904934..bbfa4978b657 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -30,6 +30,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -46,9 +47,9 @@ u8 debug_monitors_arch(void)
 static void mdscr_write(u32 mdscr)
 {
unsigned long flags;
-   local_dbg_save(flags);
+   flags = local_mask_daif();
write_sysreg(mdscr, mdscr_el1);
-   local_dbg_restore(flags);
+   local_restore_daif(flags);
 }
 NOKPROBE_SYMBOL(mdscr_write);
 
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 03/20] arm64: Move the async/fiq helpers to explicitly set process context flags

2017-10-05 Thread James Morse
Remove the local_{async,fiq}_{en,dis}able macros as they don't respect
our newly defined order and are only used to set the flags for process
context when we bring CPUs online.

Add a helper to do this. The IRQ flag varies as we want it masked on
the boot CPU until we are ready to handle interrupts.
The boot CPU unmasks SError during early boot once it can print an error
message. If we can print an error message about SError, we can do the
same for FIQ. Debug exceptions are already enabled by __cpu_setup(),
which has also configured MDSCR_EL1 to disable MDE and KDE.

Signed-off-by: James Morse 
---
 arch/arm64/include/asm/daifflags.h | 4 +++-
 arch/arm64/include/asm/irqflags.h  | 6 --
 arch/arm64/kernel/setup.c  | 8 +---
 arch/arm64/kernel/smp.c| 3 +--
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/daifflags.h 
b/arch/arm64/include/asm/daifflags.h
index fb40da8e1457..72389644c811 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -19,6 +19,9 @@
 #include 
 #include 
 
+#define DAIF_PROCCTX   0
+#define DAIF_PROCCTX_NOIRQ PSR_I_BIT
+
 /* Mask/unmask/restore all exceptions, including interrupts. */
 static inline unsigned long local_mask_daif(void)
 {
@@ -55,5 +58,4 @@ static inline void local_restore_daif(unsigned long flags)
if (arch_irqs_disabled_flags(flags))
trace_hardirqs_off();
 }
-
 #endif
diff --git a/arch/arm64/include/asm/irqflags.h 
b/arch/arm64/include/asm/irqflags.h
index 9ecdca7011f0..24692edf1a69 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -66,12 +66,6 @@ static inline void arch_local_irq_disable(void)
: "memory");
 }
 
-#define local_fiq_enable() asm("msrdaifclr, #1" : : : "memory")
-#define local_fiq_disable()asm("msrdaifset, #1" : : : "memory")
-
-#define local_async_enable()   asm("msrdaifclr, #4" : : : "memory")
-#define local_async_disable()  asm("msrdaifset, #4" : : : "memory")
-
 /*
  * Save the current interrupt enable state.
  */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index d4b740538ad5..3a3f479d8fc8 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -48,6 +48,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -262,10 +263,11 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
 
/*
-*  Unmask asynchronous aborts after bringing up possible earlycon.
-* (Report possible System Errors once we can report this occurred)
+* Unmask asynchronous aborts and fiq after bringing up possible
+* earlycon. (Report possible System Errors once we can report this
+* occurred).
 */
-   local_async_enable();
+   local_restore_daif(DAIF_PROCCTX_NOIRQ);
 
/*
 * TTBR0 is only used for the identity mapping at this stage. Make it
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index c4116f2af43f..ced874565610 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -272,8 +272,7 @@ asmlinkage void secondary_start_kernel(void)
set_cpu_online(cpu, true);
complete(_running);
 
-   local_irq_enable();
-   local_async_enable();
+   local_restore_daif(DAIF_PROCCTX);
 
/*
 * OK, it's off to the idle thread for us
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 00/20] SError rework + RAS for firmware first support

2017-10-05 Thread James Morse
Hello,

The aim of this series is to enable IESB and add ESB-instructions to let us
kick any pending RAS errors into firmware to be handled by firmware-first.

Not all systems will have this firmware, so these RAS errors will become
pending SErrors. We should take these as quickly as possible and avoid
panic()ing for errors where we could have continued.

This first part of this series reworks the DAIF masking so that SError is
unmasked unless we are handling a debug exception.

The last part provides the same minimal handling for SError that interrupt
KVM. KVM is currently unable to handle SErrors during world-switch, unless
they occur during a magic single-instruction window, it hyp-panics. I suspect
this will be easier to fix once the VHE world-switch is further optimised.

KVMs kvm_inject_vabt() needs updating for v8.2 as now we can specify an ESR,
and all-zeros has a RAS meaning.

KVM's existing 'impdef SError to the guest' behaviour probably needs revisiting.
These are errors where we don't know what they mean, they may not be
synchronised by ESB. Today we blame the guest.
My half-baked suggestion would be to make a virtual SError pending, but then
exit to user-space to give Qemu the change to quit (for virtual machines that
don't generate SError), pend an SError with a new Qemu-specific ESR, or blindly
continue and take KVMs default all-zeros impdef ESR.

Known issues:
 * Synchronous external abort SET severity is not yet considered, all
   synchronous-external-aborts are still considered fatal.

 * KVM-Migration: VDISR_EL2 is exposed to userspace as DISR_EL1, but how should
   HCR_EL2.VSE or VSESR_EL2 be migrated when the guest has an SError pending but
   hasn't taken it yet...?

 * No HCR_EL2.{TEA/TERR} setting ... Dongjiu Geng had a patch that was almost
   finished, I haven't seen the new version.

 * KVM unmasks SError and IRQ before calling handle_exit, we may be rescheduled
   while holding an uncontained ESR... (this is currently an improvement on
   assuming its an impdef error we can blame on the guest)
* We need to fix this for APEI's SEI or kernel-first RAS, the guest-exit
  SError handling will need to move to before kvm_arm_vhe_guest_exit().


Changes from v2 ... (where do I start?)
 * All the KVM patches rewritten.
 * VSESR_EL2 setting/save/restore is new, as is
 * save/restoring VDISR_EL2 and exposing it to user space as DISR_EL1.

 * The new ARM-ARM (DDI0487B.b) has an SCTLR_EL2.IESB even for !VHE, we turn
   that on.
 * 'survivable' SError are now described as 'blocking' because the CPU can't
   make progress, this makes all the commit messages clearer.
 * My IESB!=ESB confusion got fixed, so the crazy eret with SError unmasked
   is gone, never to return.
 * The cost of masking SError on return to user-space has been wrapped up with
   the ret-to-user loop. (This was only visible with microbenchmarks like
   getpid)
 * entry.S changes got cleaner, commit messages got better,


This series can be retrieved from:
git://linux-arm.org/linux-jm.git -b serror_rework/v3


Comments and contradictions welcome,

James Morse (18):
  arm64: explicitly mask all exceptions
  arm64: introduce an order for exceptions
  arm64: Move the async/fiq helpers to explicitly set process context
flags
  arm64: Mask all exceptions during kernel_exit
  arm64: entry.S: Remove disable_dbg
  arm64: entry.S: convert el1_sync
  arm64: entry.S convert el0_sync
  arm64: entry.S: convert elX_irq
  KVM: arm/arm64: mask/unmask daif around VHE guests
  arm64: kernel: Survive corrected RAS errors notified by SError
  arm64: cpufeature: Enable IESB on exception entry/return for
firmware-first
  arm64: kernel: Prepare for a DISR user
  KVM: arm64: Set an impdef ESR for Virtual-SError using VSESR_EL2.
  KVM: arm64: Save/Restore guest DISR_EL1
  KVM: arm64: Save ESR_EL2 on guest SError
  KVM: arm64: Handle RAS SErrors from EL1 on guest exit
  KVM: arm64: Handle RAS SErrors from EL2 on guest exit
  KVM: arm64: Take any host SError before entering the guest

Xie XiuQi (2):
  arm64: entry.S: move SError handling into a C function for future
expansion
  arm64: cpufeature: Detect CPU RAS Extentions

 arch/arm64/Kconfig   | 33 +-
 arch/arm64/include/asm/assembler.h   | 50 ++---
 arch/arm64/include/asm/barrier.h |  1 +
 arch/arm64/include/asm/cpucaps.h |  4 +-
 arch/arm64/include/asm/daifflags.h   | 61 +
 arch/arm64/include/asm/esr.h | 17 +++
 arch/arm64/include/asm/exception.h   | 14 ++
 arch/arm64/include/asm/irqflags.h| 40 ++---
 arch/arm64/include/asm/kvm_emulate.h | 10 +
 arch/arm64/include/asm/kvm_host.h| 16 +++
 arch/arm64/include/asm/processor.h   |  2 +
 arch/arm64/include/asm/sysreg.h  |  6 +++
 arch/arm64/include/asm/traps.h   | 36 +++
 arch/arm64/kernel/asm-offsets.c  |  1 +
 arch/arm64/kernel/cpufeature.c   | 43 ++
 

[PATCH v3 07/20] arm64: entry.S convert el0_sync

2017-10-05 Thread James Morse
el0_sync also unmasks exceptions on a case-by-case basis, debug exceptions
are enabled, unless this was a debug exception. Irqs are unmasked for
some exception types but not for others.

el0_dbg should run with everything masked to prevent us taking a debug
exception from do_debug_exception. For the other cases we can unmask
everything. This changes the behaviour of fpsimd_{acc,exc} and el0_inv
which previously ran with irqs masked.

This patch removed the last user of enable_dbg_and_irq, remove it.

Signed-off-by: James Morse 
---
 arch/arm64/include/asm/assembler.h |  9 -
 arch/arm64/kernel/entry.S  | 24 ++--
 2 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h 
b/arch/arm64/include/asm/assembler.h
index abb5abd61ddb..c2a37e2f733c 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -97,15 +97,6 @@
.endm
 
 /*
- * Enable both debug exceptions and interrupts. This is likely to be
- * faster than two daifclr operations, since writes to this register
- * are self-synchronising.
- */
-   .macro  enable_dbg_and_irq
-   msr daifclr, #(8 | 2)
-   .endm
-
-/*
  * SMP data memory barrier
  */
.macro  smp_dmb, opt
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index bd54115972a4..f7dfe5d2b1fb 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -670,8 +670,7 @@ el0_da:
 * Data abort handling
 */
mrs x26, far_el1
-   // enable interrupts before calling the main handler
-   enable_dbg_and_irq
+   enable_daif
ct_user_exit
clear_address_tag x0, x26
mov x1, x25
@@ -683,8 +682,7 @@ el0_ia:
 * Instruction abort handling
 */
mrs x26, far_el1
-   // enable interrupts before calling the main handler
-   enable_dbg_and_irq
+   enable_daif
ct_user_exit
mov x0, x26
mov x1, x25
@@ -695,7 +693,7 @@ el0_fpsimd_acc:
/*
 * Floating Point or Advanced SIMD access
 */
-   enable_dbg
+   enable_daif
ct_user_exit
mov x0, x25
mov x1, sp
@@ -705,7 +703,7 @@ el0_fpsimd_exc:
/*
 * Floating Point or Advanced SIMD exception
 */
-   enable_dbg
+   enable_daif
ct_user_exit
mov x0, x25
mov x1, sp
@@ -716,8 +714,7 @@ el0_sp_pc:
 * Stack or PC alignment exception handling
 */
mrs x26, far_el1
-   // enable interrupts before calling the main handler
-   enable_dbg_and_irq
+   enable_daif
ct_user_exit
mov x0, x26
mov x1, x25
@@ -728,8 +725,7 @@ el0_undef:
/*
 * Undefined instruction
 */
-   // enable interrupts before calling the main handler
-   enable_dbg_and_irq
+   enable_daif
ct_user_exit
mov x0, sp
bl  do_undefinstr
@@ -738,7 +734,7 @@ el0_sys:
/*
 * System instructions, for trapped cache maintenance instructions
 */
-   enable_dbg_and_irq
+   enable_daif
ct_user_exit
mov x0, x25
mov x1, sp
@@ -753,11 +749,11 @@ el0_dbg:
mov x1, x25
mov x2, sp
bl  do_debug_exception
-   enable_dbg
+   enable_daif
ct_user_exit
b   ret_to_user
 el0_inv:
-   enable_dbg
+   enable_daif
ct_user_exit
mov x0, sp
mov x1, #BAD_SYNC
@@ -836,7 +832,7 @@ el0_svc:
mov wsc_nr, #__NR_syscalls
 el0_svc_naked: // compat entry point
stp x0, xscno, [sp, #S_ORIG_X0] // save the original x0 and 
syscall number
-   enable_dbg_and_irq
+   enable_daif
ct_user_exit 1
 
ldr x16, [tsk, #TSK_TI_FLAGS]   // check for syscall hooks
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 06/20] arm64: entry.S: convert el1_sync

2017-10-05 Thread James Morse
el1_sync unmasks exceptions on a case-by-case basis, debug exceptions
are unmasked, unless this was a debug exception. IRQs are unmasked
for instruction and data aborts only if the interrupted context had
irqs unmasked.

Following our 'dai' order, el1_dbg should run with everything masked.
For the other cases we can inherit whatever we interrupted.

Add a macro inherit_daif to set daif based on the interrupted pstate.

Signed-off-by: James Morse 
---
 arch/arm64/include/asm/assembler.h |  6 ++
 arch/arm64/kernel/entry.S  | 12 
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h 
b/arch/arm64/include/asm/assembler.h
index 1b0972046a72..abb5abd61ddb 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -48,6 +48,12 @@
msr daif, \flags
.endm
 
+   /* Only on aarch64 pstate, PSR_D_BIT is different for aarch32 */
+   .macro  inherit_daif, pstate:req, tmp:req
+   and \tmp, \pstate, #(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+   msr daif, \tmp
+   .endm
+
 /*
  * Enable and disable interrupts.
  */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f7d7bf9d76e7..bd54115972a4 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -510,11 +510,7 @@ el1_da:
 * Data abort handling
 */
mrs x3, far_el1
-   enable_dbg
-   // re-enable interrupts if they were enabled in the aborted context
-   tbnzx23, #7, 1f // PSR_I_BIT
-   enable_irq
-1:
+   inherit_daifpstate=x23, tmp=x2
clear_address_tag x0, x3
mov x2, sp  // struct pt_regs
bl  do_mem_abort
@@ -525,7 +521,7 @@ el1_sp_pc:
 * Stack or PC alignment exception handling
 */
mrs x0, far_el1
-   enable_dbg
+   inherit_daifpstate=x23, tmp=x2
mov x2, sp
bl  do_sp_pc_abort
ASM_BUG()
@@ -533,7 +529,7 @@ el1_undef:
/*
 * Undefined instruction
 */
-   enable_dbg
+   inherit_daifpstate=x23, tmp=x2
mov x0, sp
bl  do_undefinstr
ASM_BUG()
@@ -550,7 +546,7 @@ el1_dbg:
kernel_exit 1
 el1_inv:
// TODO: add support for undefined instructions in kernel mode
-   enable_dbg
+   inherit_daifpstate=x23, tmp=x2
mov x0, sp
mov x2, x1
mov x1, #BAD_SYNC
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 04/20] arm64: Mask all exceptions during kernel_exit

2017-10-05 Thread James Morse
To take RAS Exceptions as quickly as possible we need to keep SError
unmasked as much as possible. We need to mask it during kernel_exit
as taking an error from this code will overwrite the exception-registers.

Adding a naked 'disable_daif' to kernel_exit causes a performance problem
for micro-benchmarks that do no real work, (e.g. calling getpid() in a
loop). This is because the ret_to_user loop has already masked IRQs so
that the TIF_WORK_MASK thread flags can't change underneath it, adding
disable_daif is an additional self-synchronising operation.

In the future, the RAS APEI code may need to modify the TIF_WORK_MASK
flags from an SError, in which case the ret_to_user loop must mask SError
while it examines the flags.

Disable all exceptions for return to EL1. For return to EL0 get the
ret_to_user loop to leave all exceptions masked once it has done its
work, this avoids an extra pstate-write.

Signed-off-by: James Morse 
---
 arch/arm64/kernel/entry.S  | 10 +-
 arch/arm64/kernel/signal.c |  8 ++--
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e1c59d4008a8..f7d7bf9d76e7 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -221,6 +221,8 @@ alternative_else_nop_endif
 
.macro  kernel_exit, el
.if \el != 0
+   disable_daif
+
/* Restore the task's original addr_limit. */
ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
str x20, [tsk, #TSK_TI_ADDR_LIMIT]
@@ -517,8 +519,6 @@ el1_da:
mov x2, sp  // struct pt_regs
bl  do_mem_abort
 
-   // disable interrupts before pulling preserved data off the stack
-   disable_irq
kernel_exit 1
 el1_sp_pc:
/*
@@ -793,7 +793,7 @@ ENDPROC(el0_irq)
  * and this includes saving x0 back into the kernel stack.
  */
 ret_fast_syscall:
-   disable_irq // disable interrupts
+   disable_daif
str x0, [sp, #S_X0] // returned x0
ldr x1, [tsk, #TSK_TI_FLAGS]// re-check for syscall tracing
and x2, x1, #_TIF_SYSCALL_WORK
@@ -803,7 +803,7 @@ ret_fast_syscall:
enable_step_tsk x1, x2
kernel_exit 0
 ret_fast_syscall_trace:
-   enable_irq  // enable interrupts
+   enable_daif
b   __sys_trace_return_skipped  // we already saved x0
 
 /*
@@ -821,7 +821,7 @@ work_pending:
  * "slow" syscall return path.
  */
 ret_to_user:
-   disable_irq // disable interrupts
+   disable_daif
ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnzx2, work_pending
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 0bdc96c61bc0..b1fd14beb044 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -756,9 +757,12 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
addr_limit_user_check();
 
if (thread_flags & _TIF_NEED_RESCHED) {
+   /* Unmask Debug and SError for the next task */
+   local_restore_daif(DAIF_PROCCTX_NOIRQ);
+
schedule();
} else {
-   local_irq_enable();
+   local_restore_daif(DAIF_PROCCTX);
 
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
@@ -775,7 +779,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
fpsimd_restore_current_state();
}
 
-   local_irq_disable();
+   local_mask_daif();
thread_flags = READ_ONCE(current_thread_info()->flags);
} while (thread_flags & _TIF_WORK_MASK);
 }
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 01/20] arm64: explicitly mask all exceptions

2017-10-05 Thread James Morse
There are a few places where we want to mask all exceptions. Today we
do this in a piecemeal fashion, typically we expect the caller to
have masked irqs and the arch code masks debug exceptions, ignoring
SError which is probably masked.

Make it clear that 'mask all exceptions' is the intention by adding
helpers to do exactly that.

This will let us unmask SError without having to add 'oh and SError'
to these paths.

Signed-off-by: James Morse 

---
Remove the disable IRQs comment above cpu_die(): we return from idle via
cpu_resume. This comment is confusing once the local_irq_disable() call
is changed.

 arch/arm64/include/asm/assembler.h | 17 +++
 arch/arm64/include/asm/daifflags.h | 59 ++
 arch/arm64/kernel/hibernate.c  |  5 ++--
 arch/arm64/kernel/machine_kexec.c  |  4 +--
 arch/arm64/kernel/smp.c|  9 ++
 arch/arm64/kernel/suspend.c|  7 +++--
 arch/arm64/kernel/traps.c  |  3 +-
 arch/arm64/mm/proc.S   |  9 +++---
 8 files changed, 94 insertions(+), 19 deletions(-)
 create mode 100644 arch/arm64/include/asm/daifflags.h

diff --git a/arch/arm64/include/asm/assembler.h 
b/arch/arm64/include/asm/assembler.h
index d58a6253c6ab..114e741ca873 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -31,6 +31,23 @@
 #include 
 #include 
 
+   .macro save_and_disable_daif, flags
+   mrs \flags, daif
+   msr daifset, #0xf
+   .endm
+
+   .macro disable_daif
+   msr daifset, #0xf
+   .endm
+
+   .macro enable_daif
+   msr daifclr, #0xf
+   .endm
+
+   .macro  restore_daif, flags:req
+   msr daif, \flags
+   .endm
+
 /*
  * Enable and disable interrupts.
  */
diff --git a/arch/arm64/include/asm/daifflags.h 
b/arch/arm64/include/asm/daifflags.h
new file mode 100644
index ..fb40da8e1457
--- /dev/null
+++ b/arch/arm64/include/asm/daifflags.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see .
+ */
+#ifndef __ASM_DAIFFLAGS_H
+#define __ASM_DAIFFLAGS_H
+
+#include 
+#include 
+
+/* Mask/unmask/restore all exceptions, including interrupts. */
+static inline unsigned long local_mask_daif(void)
+{
+   unsigned long flags;
+   asm volatile(
+   "mrs%0, daif// local_mask_daif\n"
+   "msrdaifset, #0xf"
+   : "=r" (flags)
+   :
+   : "memory");
+   trace_hardirqs_off();
+   return flags;
+}
+
+static inline void local_unmask_daif(void)
+{
+   trace_hardirqs_on();
+   asm volatile(
+   "msrdaifclr, #0xf   // local_unmask_daif"
+   :
+   :
+   : "memory");
+}
+
+static inline void local_restore_daif(unsigned long flags)
+{
+   if (!arch_irqs_disabled_flags(flags))
+   trace_hardirqs_on();
+   asm volatile(
+   "msrdaif, %0// local_restore_daif"
+   :
+   : "r" (flags)
+   : "memory");
+   if (arch_irqs_disabled_flags(flags))
+   trace_hardirqs_off();
+}
+
+#endif
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 095d3c170f5d..0b95aab3b080 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -285,7 +286,7 @@ int swsusp_arch_suspend(void)
return -EBUSY;
}
 
-   local_dbg_save(flags);
+   flags = local_mask_daif();
 
if (__cpu_suspend_enter()) {
/* make the crash dump kernel image visible/saveable */
@@ -315,7 +316,7 @@ int swsusp_arch_suspend(void)
__cpu_suspend_exit();
}
 
-   local_dbg_restore(flags);
+   local_restore_daif(flags);
 
return ret;
 }
diff --git a/arch/arm64/kernel/machine_kexec.c 
b/arch/arm64/kernel/machine_kexec.c
index 11121f608eb5..7efcb0dcc61b 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -18,6 +18,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -195,8 +196,7 @@ void machine_kexec(struct kimage *kimage)
 
pr_info("Bye!\n");
 
-   /* Disable all DAIF exceptions. */
-   asm volatile ("msr daifset, #0xf" : : : "memory");

[PATCH v3 05/20] arm64: entry.S: Remove disable_dbg

2017-10-05 Thread James Morse
enable_step_tsk is the only user of disable_dbg, which doesn't respect
our 'dai' order for exception masking. enable_step_tsk may enable
single-step, so previously needed to mask debug exceptions to prevent us
from single-stepping kernel_exit. enable_step_tsk is called at the end
of the ret_to_user loop, which has already masked all exceptions so this
is no longer needed.

Remove disable_dbg, add a comment that enable_step_tsk's caller should
have masked debug.

Signed-off-by: James Morse 
---
 arch/arm64/include/asm/assembler.h | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h 
b/arch/arm64/include/asm/assembler.h
index 114e741ca873..1b0972046a72 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -68,13 +68,6 @@
msr daif, \flags
.endm
 
-/*
- * Enable and disable debug exceptions.
- */
-   .macro  disable_dbg
-   msr daifset, #8
-   .endm
-
.macro  enable_dbg
msr daifclr, #8
.endm
@@ -88,9 +81,9 @@
 9990:
.endm
 
+   /* call with daif masked */
.macro  enable_step_tsk, flgs, tmp
tbz \flgs, #TIF_SINGLESTEP, 9990f
-   disable_dbg
mrs \tmp, mdscr_el1
orr \tmp, \tmp, #1
msr mdscr_el1, \tmp
-- 
2.13.3

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length

2017-10-05 Thread Dave Martin
On Thu, Oct 05, 2017 at 05:53:34PM +0100, Catalin Marinas wrote:
> On Thu, Oct 05, 2017 at 05:42:29PM +0100, Dave P Martin wrote:
> > On Wed, Sep 13, 2017 at 03:11:23PM -0700, Catalin Marinas wrote:
> > > On Wed, Sep 13, 2017 at 08:06:12PM +0100, Dave P Martin wrote:
> > > > On Wed, Sep 13, 2017 at 10:29:11AM -0700, Catalin Marinas wrote:

[...]

> > > > > Can this be merged with patch 20? It seems to add the PR_ definitions
> > > > > which get actually used later when the prctl interface is added.
> > > > 
> > > > This patch is used both by patch 19 and by patch 20, which I preferred
> > > > not to merge with each other: ptrace and prctl are significantly
> > > > different things.
> > > > 
> > > > The prctl bit definitions are added here because they are the canonical
> > > > definitions used by both interfaces.  The ptrace #defines are based on
> > > > them.
> > > > 
> > > > Does it make sense if I merge patch 20 into this one and apply patch 19
> > > > on top?  This avoide the appearance of prctl #defines with no prctl
> > > > implementation.
> > > 
> > > That's fine, you can bring patch 20 forward. If there are other
> > > non-trivial issues, feel free to ignore my comment.
> > 
> > I've had a go at this, but I think it's going to be more trouble than
> > it's worth -- there are other interdependencies between the patches
> > which make them tricky to reorder.
> > 
> > I could add a note in the commit message for this patch explaining why
> > the prctl flag #defines are being added here.  What do you think?
> 
> As I said, it's up to you. A line in the commit message would do.

OK, I think I'll stick with this then.

Cheers
---Dave
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 26/28] arm64/sve: Add documentation

2017-10-05 Thread Dave Martin
On Thu, Oct 05, 2017 at 05:39:24PM +0100, Szabolcs Nagy wrote:
> On 31/08/17 18:00, Dave Martin wrote:

[...]

> > +   PR_SVE_SET_VL_ONEXEC
> > +
> > +   Defer the requested vector length change until the next execve().
> > +   This allows launching of a new program with a different vector
> > +   length, while avoiding runtime side effects in the caller.
> > +
> > +   This also overrides the effect of PR_SVE_SET_VL_INHERIT for the
> > +   first execve().
> > +
> > +   Without PR_SVE_SET_VL_ONEXEC, any outstanding deferred vector
> > +   length change is cancelled.
> > +
> 
> based on later text it seems this works if exeve is
> called in the same thread as prctl was called in.
> 
> this is a bit weird from user-space pov so it may
> make sense to state it here explicitly.

True.  Looking at the prctl(2) man page it looks like other per-thread
properties are inherited across execve() in a similar way, but it's at
least worth a mention.  PR_SET_SECCOMP seems to work like this, for
example.

So, the intention is that you do a prctl(...ONEXEC) in the run up to
execve(), rather than doing it at other random times.  The primary
reason for ONEXEC is to avoid the side-effects of actually changing
the VL.


Looking at this though...
I wonder whether PR_SVE_SET_VL(... PR_SVE_SET_VL_ONEXEC) should return
the VL set for exec, rather than the current VL (which is unchanged by
definition in this case, thus uninteresting).

This would allow the ONEXEC flag to be used to probe for available VLs
without the other side-effects of changing VL, something like:

int old = prctl(PR_SVE_GET_VL);
int ret;

ret = prctl(PR_SVE_SET_VL, 144 | PR_SVE_SET_VL_ONEXEC);
if (ret == -1) {
perror("PR_SVE_SET_VL");
goto error;
}

if ((ret & PR_SVE_VL_LEN_MASK) == 144)
have_vl_144 = true;

if (prctl(PR_SVE_SET_VL, old | PR_SVE_SET_VL_ONEXEC) == -1) {
perror("PR_SVE_SET_VL");
goto error;
}


This does _not_ do the expected thing right now, since there's no
direct way to retrieve thread.sve_vl_onexec directly from the kernel
(and it didn't really seem justified to add one).

Thoughts?

Cheers
---Dave
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length

2017-10-05 Thread Catalin Marinas
On Thu, Oct 05, 2017 at 05:42:29PM +0100, Dave P Martin wrote:
> On Wed, Sep 13, 2017 at 03:11:23PM -0700, Catalin Marinas wrote:
> > On Wed, Sep 13, 2017 at 08:06:12PM +0100, Dave P Martin wrote:
> > > On Wed, Sep 13, 2017 at 10:29:11AM -0700, Catalin Marinas wrote:
> > > > On Thu, Aug 31, 2017 at 06:00:46PM +0100, Dave P Martin wrote:
> > > > > This patch implements the core logic for changing a task's vector
> > > > > length on request from userspace.  This will be used by the ptrace
> > > > > and prctl frontends that are implemented in later patches.
> > > > > 
> > > > > The SVE architecture permits, but does not require, implementations
> > > > > to support vector lengths that are not a power of two.  To handle
> > > > > this, logic is added to check a requested vector length against a
> > > > > possibly sparse bitmap of available vector lengths at runtime, so
> > > > > that the best supported value can be chosen.
> > > > > 
> > > > > Signed-off-by: Dave Martin 
> > > > > Cc: Alex Bennée 
> > > > 
> > > > Can this be merged with patch 20? It seems to add the PR_ definitions
> > > > which get actually used later when the prctl interface is added.
> > > 
> > > This patch is used both by patch 19 and by patch 20, which I preferred
> > > not to merge with each other: ptrace and prctl are significantly
> > > different things.
> > > 
> > > The prctl bit definitions are added here because they are the canonical
> > > definitions used by both interfaces.  The ptrace #defines are based on
> > > them.
> > > 
> > > Does it make sense if I merge patch 20 into this one and apply patch 19
> > > on top?  This avoide the appearance of prctl #defines with no prctl
> > > implementation.
> > 
> > That's fine, you can bring patch 20 forward. If there are other
> > non-trivial issues, feel free to ignore my comment.
> 
> I've had a go at this, but I think it's going to be more trouble than
> it's worth -- there are other interdependencies between the patches
> which make them tricky to reorder.
> 
> I could add a note in the commit message for this patch explaining why
> the prctl flag #defines are being added here.  What do you think?

As I said, it's up to you. A line in the commit message would do.

-- 
Catalin
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 14/28] arm64/sve: Backend logic for setting the vector length

2017-10-05 Thread Dave Martin
On Wed, Sep 13, 2017 at 03:11:23PM -0700, Catalin Marinas wrote:
> On Wed, Sep 13, 2017 at 08:06:12PM +0100, Dave P Martin wrote:
> > On Wed, Sep 13, 2017 at 10:29:11AM -0700, Catalin Marinas wrote:
> > > On Thu, Aug 31, 2017 at 06:00:46PM +0100, Dave P Martin wrote:
> > > > This patch implements the core logic for changing a task's vector
> > > > length on request from userspace.  This will be used by the ptrace
> > > > and prctl frontends that are implemented in later patches.
> > > > 
> > > > The SVE architecture permits, but does not require, implementations
> > > > to support vector lengths that are not a power of two.  To handle
> > > > this, logic is added to check a requested vector length against a
> > > > possibly sparse bitmap of available vector lengths at runtime, so
> > > > that the best supported value can be chosen.
> > > > 
> > > > Signed-off-by: Dave Martin 
> > > > Cc: Alex Bennée 
> > > 
> > > Can this be merged with patch 20? It seems to add the PR_ definitions
> > > which get actually used later when the prctl interface is added.
> > 
> > This patch is used both by patch 19 and by patch 20, which I preferred
> > not to merge with each other: ptrace and prctl are significantly
> > different things.
> > 
> > The prctl bit definitions are added here because they are the canonical
> > definitions used by both interfaces.  The ptrace #defines are based on
> > them.
> > 
> > Does it make sense if I merge patch 20 into this one and apply patch 19
> > on top?  This avoide the appearance of prctl #defines with no prctl
> > implementation.
> 
> That's fine, you can bring patch 20 forward. If there are other
> non-trivial issues, feel free to ignore my comment.

I've had a go at this, but I think it's going to be more trouble than
it's worth -- there are other interdependencies between the patches
which make them tricky to reorder.

I could add a note in the commit message for this patch explaining why
the prctl flag #defines are being added here.  What do you think?

Cheers
---Dave
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 26/28] arm64/sve: Add documentation

2017-10-05 Thread Szabolcs Nagy
On 31/08/17 18:00, Dave Martin wrote:
> +prctl(PR_SVE_SET_VL, unsigned long arg)
> +
> +Sets the vector length of the calling thread and related flags, where
> +arg == vl | flags.
> +
> +vl is the desired vector length, where sve_vl_valid(vl) must be true.
> +
> +flags:
> +
> + PR_SVE_SET_VL_INHERIT
> +
> + Inherit the current vector length across execve().  Otherwise, the
> + vector length is reset to the system default at execve().  (See
> + Section 9.)
> +
> + PR_SVE_SET_VL_ONEXEC
> +
> + Defer the requested vector length change until the next execve().
> + This allows launching of a new program with a different vector
> + length, while avoiding runtime side effects in the caller.
> +
> + This also overrides the effect of PR_SVE_SET_VL_INHERIT for the
> + first execve().
> +
> + Without PR_SVE_SET_VL_ONEXEC, any outstanding deferred vector
> + length change is cancelled.
> +

based on later text it seems this works if exeve is
called in the same thread as prctl was called in.

this is a bit weird from user-space pov so it may
make sense to state it here explicitly.

> +Return value: a nonnegative on success, or a negative value on error:
> + EINVAL: SVE not supported, invalid vector length requested, or
> + invalid flags.
> +
> +On success, the calling thread's vector length is changed to the largest
> +value supported by the system that is less than or equal to vl.
> +If vl == SVE_VL_MAX, the calling thread's vector length is changed to the
> +largest value supported by the system.
> +
> +The returned value describes the resulting configuration, encoded as for
> +PR_SVE_GET_VL.
> +
> +Changing the vector length causes all of P0..P15, FFR and all bits of
> +Z0..V31 except for Z0 bits [127:0] .. Z31 bits [127:0] to become
> +unspecified.  Calling PR_SVE_SET_VL with vl equal to the thread's current
> +vector length does not constitute a change to the vector length for this
> +purpose.

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 11/28] arm64/sve: Core task context handling

2017-10-05 Thread Catalin Marinas
On Tue, Oct 03, 2017 at 12:33:03PM +0100, Dave P Martin wrote:
> On Wed, Sep 13, 2017 at 07:33:25AM -0700, Catalin Marinas wrote:
> > On Thu, Aug 31, 2017 at 06:00:43PM +0100, Dave P Martin wrote:
> > > +/*
> > > + * Handle SVE state across fork():
> > > + *
> > > + * dst and src must not end up with aliases of the same sve_state.
> > > + * Because a task cannot fork except in a syscall, we can discard SVE
> > > + * state for dst here, so long as we take care to retain the FPSIMD
> > > + * subset of the state if SVE is in use.  Reallocation of the SVE state
> > > + * will be deferred until dst tries to use SVE.
> > > + */
> > > +void fpsimd_dup_sve(struct task_struct *dst, struct task_struct const 
> > > *src)
> > > +{
> > > + if (test_and_clear_tsk_thread_flag(dst, TIF_SVE)) {
> > > + WARN_ON(dst->mm && !in_syscall(task_pt_regs(dst)));
> > > + sve_to_fpsimd(dst);
> > > + }
> > > +
> > > + dst->thread.sve_state = NULL;
> > > +}
> > 
> > I first thought the thread flags are not visible in dst yet since
> > dup_task_struct() calls arch_dup_task_struct() before
> > setup_thread_stack(). However, at the end of the last year we enabled
> > CONFIG_THREAD_INFO_IN_TASK_STRUCT. But I don't particularly like relying
> > on this.
> > 
> > Anyway, IIUC we don't need sve_to_fpsimd() here. The
> > arch_dup_task_struct() already called fpsimd_preserve_current_state()
> > for src, so the FPSIMD state (which we care about) is transferred during
> > the *dst = *src assignment. So you'd only need the last statement,
> > possibly with a different function name like fpsimd_erase_sve (and maybe
> > make the function static inline in the header).
> 
> Regarding the intended meanings of the thread flags, does this help?
> 
> --8<--
> 
> TIF_FOREIGN_FPSTATE's meaning is expanded to cover SVE, but otherwise
> unchanged:
> 
>  * If a task is running and !TIF_FOREIGN_FPSTATE, then the the CPU
>registers of the CPU the task is running on contain the authoritative
>FPSIMD/SVE state of the task.  The backing memory may be stale.
> 
>  * Otherwise (i.e., task not running, or task running and
>TIF_FOREIGN_FPSTATE), the task's FPSIMD/SVE backing memory is
>authoritative.  If additionally per_cpu(fpsimd_last_state,
>task->fpsimd_state.cpu) == >fpsimd_state.cpu, then
>task->fpsimd_state.cpu's registers are also up to date for task, but
>not authorititive: the current FPSIMD/SVE state may be read from
>them, but they must not be written.
>  
> The FPSIMD/SVE backing memory is selected by TIF_SVE:
> 
>  * TIF_SVE set: Zn (incorporating Vn in bits[127:0]), Pn and FFR are
>stored in task->thread.sve_state, formatted appropriately for vector
>length task->thread.sve_vl.  task->thread.sve_state must point to a
>valid buffer at least sve_state_size(task) bytes in size.
> 
>  * TIF_SVE clear: Vn are stored in task->fpsimd_state; Zn[max : 128] are
>logically zero[*] but not stored anywhere; Pn, FFR are not stored and
>have unspecified values from userspace's point of view.
>task->thread.sve_state does not need to be non-null, valid or any
>particular size: it must not be dereferenced.
> 
>In practice I don't exploit the "unspecifiedness" much.  The Zn high
>bits, Pn and FFR are all zeroed when setting TIF_SVE again:
>sve_alloc() is the common path for this.
> 
>  * FPSR and FPCR are always stored in task->fpsimd_state irrespctive of
>whether TIF_SVE is clear or set, since these are not vector length
>dependent.

This looks fine. I think we need to make sure (with a warning) that
task_fpsimd_save() (and probably load) is always called in a
non-preemptible context. 

> [*] theoretically unspecified, which is what I've written in sve.txt.
> However, on any FPSIMD Vn write the upper bits of the corresponding Zn
> must become logically zero in order to conform to the SVE programmer's
> model.  It's not feasible to track which Vn have been written
> individually because that would involve trapping every FPSIMD insn until
> all possible Vn have been written.  So the kernel ensures that the Zn
> high bits become zero.
> 
> Maybe we should just guarantee "zero-or-preserved" behaviour for
> userspace.  This may close down some future optimisation opportunities,
> but maybe it's better to be simple.

Does it work if we leave it as "unspecified" in the document but just do
zero-or-preserved in the kernel code?

Just wondering, as an optimisation for do_sve_acc() - instead of
sve_alloc() and fpsimd_to_sve(), can we not force the loading of the
FPSIMD regs on the return to user via TIF_FOREIGN_FPSTATE? This would
ensure the zeroing of the top SVE bits and we only need to allocate the
SVE state on the saving path. This means enabling SVE for user and
setting TIF_SVE without having the backing storage allocated.

-- 
Catalin
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu

Re: [PATCH v2 10/28] arm64/sve: Low-level CPU setup

2017-10-05 Thread Dave Martin
On Thu, Oct 05, 2017 at 12:04:12PM +0100, Suzuki K Poulose wrote:
> On 05/10/17 11:47, Dave Martin wrote:

[...]

> >>>On Thu, Aug 31, 2017 at 06:00:42PM +0100, Dave P Martin wrote:
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 877d42f..dd22ef2 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -27,6 +27,7 @@
>   #include 
>   #include 
>   #include 
> +#include 
>   #ifdef CONFIG_ARM64_64K_PAGES
>   #define TCR_TG_FLAGSTCR_TG0_64K | TCR_TG1_64K
> @@ -186,8 +187,17 @@ ENTRY(__cpu_setup)
>   tlbivmalle1 // Invalidate local TLB
>   dsb nsh
> - mov x0, #3 << 20
> - msr cpacr_el1, x0   // Enable FP/ASIMD
> + mov x0, #3 << 20// FEN
> +
> + /* SVE */
> + mrs x5, id_aa64pfr0_el1
> + ubfxx5, x5, #ID_AA64PFR0_SVE_SHIFT, #4
> + cbz x5, 1f
> +
> + bic x0, x0, #CPACR_EL1_ZEN
> + orr x0, x0, #CPACR_EL1_ZEN_EL1EN// SVE: trap for EL0, not EL1
> +1:   msr cpacr_el1, x0   // Enable FP/ASIMD

[..]

> >I can add a helper el1_enable_sve() say, and call it before probing
> >ZCR_EL1 in the cpufeatures code.
> >
> >This makes enabling SVE a side-effect of the cpufeatures code, which
> >I'm not that comfortable with -- it feels like something that later
> >refactoring could easily break.
> >
> >I can also add an explicit call to el1_enable_sve() in sve_setup(),
> >but this only works on the boot cpu.
> >
> >For secondaries, I could add something in secondary_start_kernel(),
> >but this looks incongruous since there's no other call to do something
> >similar yet.
> >
> >
> >** Suzuki, do we have any other case where the trap for a CPU feature is
> >turned off by the cpufeatures code?  If there's already a template for
> >this then I'm happy to follow it.
> 
> The closest thing you have is an "enable" callback for each "available"
> capability, which gets invoked on all CPUs (boot time active CPUs and the
> ones which are brought up later). This is used by things like, PAN to
> do some CPU specific setups.
> 
> See  :
>enable_cpu_capabilities()  // For all boot time active CPUs
>and
>verify_local_cpu_features()  // For CPUs brought up later

That may allow me to do something tidier, provided the enable method is
called early enough.

I'll take a look.

Cheers
---Dave
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 10/28] arm64/sve: Low-level CPU setup

2017-10-05 Thread Suzuki K Poulose

On 05/10/17 11:47, Dave Martin wrote:

On Wed, Sep 13, 2017 at 08:21:11PM +0100, Dave Martin wrote:

On Wed, Sep 13, 2017 at 06:32:06AM -0700, Catalin Marinas wrote:

On Thu, Aug 31, 2017 at 06:00:42PM +0100, Dave P Martin wrote:

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 877d42f..dd22ef2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -27,6 +27,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #ifdef CONFIG_ARM64_64K_PAGES

  #define TCR_TG_FLAGS  TCR_TG0_64K | TCR_TG1_64K
@@ -186,8 +187,17 @@ ENTRY(__cpu_setup)
tlbivmalle1 // Invalidate local TLB
dsb nsh
  
-	mov	x0, #3 << 20

-   msr cpacr_el1, x0   // Enable FP/ASIMD
+   mov x0, #3 << 20  // FEN
+
+   /* SVE */
+   mrs x5, id_aa64pfr0_el1
+   ubfxx5, x5, #ID_AA64PFR0_SVE_SHIFT, #4
+   cbz x5, 1f
+
+   bic x0, x0, #CPACR_EL1_ZEN
+   orr x0, x0, #CPACR_EL1_ZEN_EL1EN// SVE: trap for EL0, not EL1
+1: msr cpacr_el1, x0   // Enable FP/ASIMD


For EL1, I wonder whether we could move this later to cpufeature.c. IIRC
I tried to do the same with FPSIMD but hit an issue with EFI run-time
services (I may be wrong though).


I'll take a look at this -- I believe it should be safe to disable this
trap for EL1 relatively late.  This is needed before probing for
available vector lengths, but apart from that the kernel shouldn't touch
SVE until/unless some user task uses SVE.

This would change if we eventually enable kernel-mode SVE, but I wouldn't
expect that to get used in early boot before the cpufeatures code runs.

Ard may have a view on this.


I've had a go at this, but there's a lot of splatter.

I can add a helper el1_enable_sve() say, and call it before probing
ZCR_EL1 in the cpufeatures code.

This makes enabling SVE a side-effect of the cpufeatures code, which
I'm not that comfortable with -- it feels like something that later
refactoring could easily break.

I can also add an explicit call to el1_enable_sve() in sve_setup(),
but this only works on the boot cpu.

For secondaries, I could add something in secondary_start_kernel(),
but this looks incongruous since there's no other call to do something
similar yet.


** Suzuki, do we have any other case where the trap for a CPU feature is
turned off by the cpufeatures code?  If there's already a template for
this then I'm happy to follow it.


The closest thing you have is an "enable" callback for each "available"
capability, which gets invoked on all CPUs (boot time active CPUs and the
ones which are brought up later). This is used by things like, PAN to
do some CPU specific setups.

See  :
   enable_cpu_capabilities()  // For all boot time active CPUs
   and
   verify_local_cpu_features()  // For CPUs brought up later

Cheers
Suzuki
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 10/28] arm64/sve: Low-level CPU setup

2017-10-05 Thread Dave Martin
On Wed, Sep 13, 2017 at 08:21:11PM +0100, Dave Martin wrote:
> On Wed, Sep 13, 2017 at 06:32:06AM -0700, Catalin Marinas wrote:
> > On Thu, Aug 31, 2017 at 06:00:42PM +0100, Dave P Martin wrote:
> > > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> > > index 877d42f..dd22ef2 100644
> > > --- a/arch/arm64/mm/proc.S
> > > +++ b/arch/arm64/mm/proc.S
> > > @@ -27,6 +27,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > >  
> > >  #ifdef CONFIG_ARM64_64K_PAGES
> > >  #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
> > > @@ -186,8 +187,17 @@ ENTRY(__cpu_setup)
> > >   tlbivmalle1 // Invalidate local TLB
> > >   dsb nsh
> > >  
> > > - mov x0, #3 << 20
> > > - msr cpacr_el1, x0   // Enable FP/ASIMD
> > > + mov x0, #3 << 20// FEN
> > > +
> > > + /* SVE */
> > > + mrs x5, id_aa64pfr0_el1
> > > + ubfxx5, x5, #ID_AA64PFR0_SVE_SHIFT, #4
> > > + cbz x5, 1f
> > > +
> > > + bic x0, x0, #CPACR_EL1_ZEN
> > > + orr x0, x0, #CPACR_EL1_ZEN_EL1EN// SVE: trap for EL0, not EL1
> > > +1:   msr cpacr_el1, x0   // Enable FP/ASIMD
> > 
> > For EL1, I wonder whether we could move this later to cpufeature.c. IIRC
> > I tried to do the same with FPSIMD but hit an issue with EFI run-time
> > services (I may be wrong though).
> 
> I'll take a look at this -- I believe it should be safe to disable this
> trap for EL1 relatively late.  This is needed before probing for
> available vector lengths, but apart from that the kernel shouldn't touch
> SVE until/unless some user task uses SVE.
> 
> This would change if we eventually enable kernel-mode SVE, but I wouldn't
> expect that to get used in early boot before the cpufeatures code runs.
> 
> Ard may have a view on this.

I've had a go at this, but there's a lot of splatter.

I can add a helper el1_enable_sve() say, and call it before probing
ZCR_EL1 in the cpufeatures code.

This makes enabling SVE a side-effect of the cpufeatures code, which
I'm not that comfortable with -- it feels like something that later
refactoring could easily break.

I can also add an explicit call to el1_enable_sve() in sve_setup(),
but this only works on the boot cpu.

For secondaries, I could add something in secondary_start_kernel(),
but this looks incongruous since there's no other call to do something
similar yet.


** Suzuki, do we have any other case where the trap for a CPU feature is
turned off by the cpufeatures code?  If there's already a template for
this then I'm happy to follow it.

Otherwise, maybe it's simpler to keep it in __cpu_setup after all
since that's a common path that all CPUs pass through.

Cheers
---Dave
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 5/5] KVM: arm/arm64: kvm_arch_vcpu_runnable: don't miss injected irqs

2017-10-05 Thread Marc Zyngier
On 29/09/17 12:30, Andrew Jones wrote:
> When the vPMU is in use if a VCPU's perf event overflow handler
> were to fire after the VCPU started waiting, then the wake up
> done by the kvm_vcpu_kick() call in the handler would do nothing,
> as no "pmu overflow" state is checked in kvm_arch_vcpu_runnable().
> Fix this by checking the IRQ_PENDING VCPU request in runnable().
> Checking the request also sufficiently covers all the cases that
> kvm_vgic_vcpu_pending_irq() cover, so we can just replace that.
> 
> Signed-off-by: Andrew Jones 
> ---
>  virt/kvm/arm/arm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 5bc9b0d2fd0f..725527f491e4 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -423,7 +423,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
>   return !vcpu_should_sleep(vcpu) &&
>  (vcpu->arch.mp_state != KVM_MP_STATE_HALTED ||
>   (!!vcpu->arch.irq_lines ||
> -  kvm_vgic_vcpu_pending_irq(vcpu)));
> +  kvm_test_request(KVM_REQ_IRQ_PENDING, vcpu)));
>  }
>  
>  bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
> 

Reviewed-by: Marc Zyngier 

On a side note, I just had a look at our usage of KVM_REQ_IRQ_PENDING,
and we always seem to have a make_request/kick pair (which definitely
makes sense). Maybe there is room for a bit of consolidation there too.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 6/5] KVM: arm/arm64: make kvm_vgic_vcpu_pending_irq static

2017-10-05 Thread Marc Zyngier
On 02/10/17 09:31, Andrew Jones wrote:
> Now that the only caller of kvm_vgic_vcpu_pending_irq() is in the
> vgic code, let's make it private in order to discourage it's use
> outside, as checking the IRQ_PENDING VCPU request is likely the
> right thing for non-vgic code to do. Also, remove a vgic prototype
> that was mistakenly introduced with 0919e84c0fc1, and along for the
> ride ever since.
> 
> Signed-off-by: Andrew Jones 
> ---
>  include/kvm/arm_vgic.h   | 3 ---
>  virt/kvm/arm/vgic/vgic.c | 2 +-
>  2 files changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 34dba516ef24..aade132d2a30 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -311,8 +311,6 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 
> virt_irq, u32 phys_irq);
>  int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq);
>  bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq);
>  
> -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
> -
>  void kvm_vgic_load(struct kvm_vcpu *vcpu);
>  void kvm_vgic_put(struct kvm_vcpu *vcpu);
>  
> @@ -322,7 +320,6 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu);
>  #define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \
>   ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS))
>  
> -bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
>  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
>  void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
>  
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index fed717e07938..72a16259beec 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -730,7 +730,7 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu)
>   vgic_v3_put(vcpu);
>  }
>  
> -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
> +static int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
>  {
>   struct vgic_cpu *vgic_cpu = >arch.vgic_cpu;
>   struct vgic_irq *irq;
> 

Acked-by: Marc Zyngier 

M.
-- 
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 4/5] KVM: arm/arm64: improve kvm_arch_vcpu_runnable

2017-10-05 Thread Marc Zyngier
On 29/09/17 12:30, Andrew Jones wrote:
> Conceptually, kvm_arch_vcpu_runnable() should be "not waiting,
> or waiting for interrupts and interrupts are pending",
> 
>   !waiting-uninterruptable &&
>   (!waiting-for-interrupts || interrupts-pending)
> 
> but the implementation was only
> 
>   !waiting-uninterruptable && interrupts-pending
> 
> Thanks to the context of the two callers there wasn't an issue,
> however, to future-proof the function, this patch implements the
> conceptual condition by applying mp_state to track waiting-
> uninterruptable vs. waiting-for-interrupts.
> 
> Signed-off-by: Andrew Jones 
> ---
>  Documentation/virtual/kvm/api.txt | 10 ++
>  virt/kvm/arm/arm.c| 13 +
>  2 files changed, 15 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index e63a35fafef0..7c0bb1ae10a2 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -1071,7 +1071,7 @@ Possible values are:
>   - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
>   now ready for a SIPI [x86]
>   - KVM_MP_STATE_HALTED:  the vcpu has executed a HLT instruction and
> - is waiting for an interrupt [x86]
> + is waiting for an interrupt [x86,arm/arm64]
>   - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
>   accessible via KVM_GET_VCPU_EVENTS) [x86]
>   - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390,arm/arm64]
> @@ -1087,8 +1087,9 @@ these architectures.
>  
>  For arm/arm64:
>  
> -The only states that are valid are KVM_MP_STATE_STOPPED and
> -KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
> +The only states that are valid are KVM_MP_STATE_STOPPED, KVM_MP_STATE_HALTED,
> +and KVM_MP_STATE_RUNNABLE which reflect if the vcpu is powered-off, waiting
> +for interrupts, or powered-on and not waiting for interrupts.
>  
>  4.39 KVM_SET_MP_STATE
>  
> @@ -1108,7 +1109,8 @@ these architectures.
>  For arm/arm64:
>  
>  The only states that are valid are KVM_MP_STATE_STOPPED and
> -KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
> +KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be
> +powered-off or not.
>  
>  4.40 KVM_SET_IDENTITY_MAP_ADDR
>  
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 220a3dbda76c..5bc9b0d2fd0f 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -414,13 +414,16 @@ static bool vcpu_should_sleep(struct kvm_vcpu *vcpu)
>   * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
>   * @vcpu:The VCPU pointer
>   *
> - * If the guest CPU is not waiting for interrupts or an interrupt line is
> - * asserted, the CPU is by definition runnable.
> + * If the VCPU is not waiting at all (including sleeping, which is waiting
> + * uninterruptably), or it's waiting for interrupts but interrupts are
> + * pending, then it is by definition runnable.
>   */
>  int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
>  {
> - return (!!vcpu->arch.irq_lines || kvm_vgic_vcpu_pending_irq(vcpu))
> - && !vcpu_should_sleep(vcpu);
> + return !vcpu_should_sleep(vcpu) &&
> +(vcpu->arch.mp_state != KVM_MP_STATE_HALTED ||
> + (!!vcpu->arch.irq_lines ||
> +  kvm_vgic_vcpu_pending_irq(vcpu)));
>  }
>  
>  bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
> @@ -582,7 +585,9 @@ void kvm_arm_emulate_wfe(struct kvm_vcpu *vcpu)
>  void kvm_arm_emulate_wfi(struct kvm_vcpu *vcpu)
>  {
>   vcpu->stat.wfi_exit_stat++;
> + vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
>   kvm_vcpu_block(vcpu);
> + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
>   kvm_clear_request(KVM_REQ_UNHALT, vcpu);
>  }
>  
> 

Reviewed-by: Marc Zyngier 

M.
-- 
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 3/5] KVM: arm/arm64: factor out common wfe/wfi emulation code

2017-10-05 Thread Marc Zyngier
On 29/09/17 12:30, Andrew Jones wrote:
> Before we add more common code to the wfi emulation, let's first
> factor out everything common.
> 
> Signed-off-by: Andrew Jones 
> ---
>  arch/arm/include/asm/kvm_host.h   |  2 ++
>  arch/arm/kvm/handle_exit.c| 14 +-
>  arch/arm64/include/asm/kvm_host.h |  2 ++
>  arch/arm64/kvm/handle_exit.c  | 14 +-
>  virt/kvm/arm/arm.c| 13 +
>  virt/kvm/arm/psci.c   | 15 +++
>  6 files changed, 34 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 85f3c20b9759..964320825372 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -231,6 +231,8 @@ void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
>  void kvm_arm_vcpu_power_on(struct kvm_vcpu *vcpu);
>  void kvm_arm_halt_guest(struct kvm *kvm);
>  void kvm_arm_resume_guest(struct kvm *kvm);
> +void kvm_arm_emulate_wfe(struct kvm_vcpu *vcpu);
> +void kvm_arm_emulate_wfi(struct kvm_vcpu *vcpu);
>  
>  int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
>  unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
> index cf8bf6bf87c4..e40466577c87 100644
> --- a/arch/arm/kvm/handle_exit.c
> +++ b/arch/arm/kvm/handle_exit.c
> @@ -57,22 +57,18 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct 
> kvm_run *run)
>   * @run: the kvm_run structure pointer
>   *
>   * WFE: Yield the CPU and come back to this vcpu when the scheduler
> - * decides to.
> - * WFI: Simply call kvm_vcpu_block(), which will halt execution of
> - * world-switches and schedule other host processes until there is an
> - * incoming IRQ or FIQ to the VM.
> + *  decides to.
> + * WFI: Halt execution of world-switches and schedule other host
> + *  processes until there is an incoming IRQ or FIQ to the VM.
>   */
>  static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
>   if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
>   trace_kvm_wfx(*vcpu_pc(vcpu), true);
> - vcpu->stat.wfe_exit_stat++;
> - kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
> + kvm_arm_emulate_wfe(vcpu);
>   } else {
>   trace_kvm_wfx(*vcpu_pc(vcpu), false);
> - vcpu->stat.wfi_exit_stat++;
> - kvm_vcpu_block(vcpu);
> - kvm_clear_request(KVM_REQ_UNHALT, vcpu);
> + kvm_arm_emulate_wfi(vcpu);
>   }
>  
>   kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 25545a87de11..a774f6b30474 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -332,6 +332,8 @@ void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
>  void kvm_arm_vcpu_power_on(struct kvm_vcpu *vcpu);
>  void kvm_arm_halt_guest(struct kvm *kvm);
>  void kvm_arm_resume_guest(struct kvm *kvm);
> +void kvm_arm_emulate_wfe(struct kvm_vcpu *vcpu);
> +void kvm_arm_emulate_wfi(struct kvm_vcpu *vcpu);
>  
>  u64 __kvm_call_hyp(void *hypfn, ...);
>  #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index 7debb74843a0..7ba50a296d10 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -74,22 +74,18 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct 
> kvm_run *run)
>   * @vcpu:the vcpu pointer
>   *
>   * WFE: Yield the CPU and come back to this vcpu when the scheduler
> - * decides to.
> - * WFI: Simply call kvm_vcpu_block(), which will halt execution of
> - * world-switches and schedule other host processes until there is an
> - * incoming IRQ or FIQ to the VM.
> + *  decides to.
> + * WFI: Halt execution of world-switches and schedule other host
> + *  processes until there is an incoming IRQ or FIQ to the VM.
>   */
>  static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
>   if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
>   trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
> - vcpu->stat.wfe_exit_stat++;
> - kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu));
> + kvm_arm_emulate_wfe(vcpu);
>   } else {
>   trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
> - vcpu->stat.wfi_exit_stat++;
> - kvm_vcpu_block(vcpu);
> - kvm_clear_request(KVM_REQ_UNHALT, vcpu);
> + kvm_arm_emulate_wfi(vcpu);
>   }
>  
>   kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 954e77608d29..220a3dbda76c 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -573,6 +573,19 @@ void kvm_arm_resume_guest(struct kvm 

Re: [PATCH 2/5] KVM: arm/arm64: replace power_off with mp_state=STOPPED

2017-10-05 Thread Marc Zyngier
On 29/09/17 12:30, Andrew Jones wrote:
> This prepares for more MP states to be used.
> 
> Signed-off-by: Andrew Jones 
> ---
>  arch/arm/include/asm/kvm_host.h   |  6 --
>  arch/arm64/include/asm/kvm_host.h |  6 --
>  virt/kvm/arm/arm.c| 26 ++
>  virt/kvm/arm/psci.c   | 17 +
>  4 files changed, 27 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 4a879f6ff13b..85f3c20b9759 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -171,8 +171,8 @@ struct kvm_vcpu_arch {
>* here.
>*/
>  
> - /* vcpu power-off state */
> - bool power_off;
> + /* Current VCPU MP state */
> + u32 mp_state;

What is the rational for not using a struct kvm_mp_state directly here?
It is the same thing (a u32), but I certainly find it more explicit than
a bare u32.

>  
>/* Don't run the guest (internal implementation need) */
>   bool pause;
> @@ -227,6 +227,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
>  
>  struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
>  struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
> +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
> +void kvm_arm_vcpu_power_on(struct kvm_vcpu *vcpu);
>  void kvm_arm_halt_guest(struct kvm *kvm);
>  void kvm_arm_resume_guest(struct kvm *kvm);
>  
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index e923b58606e2..25545a87de11 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -255,8 +255,8 @@ struct kvm_vcpu_arch {
>   u32 mdscr_el1;
>   } guest_debug_preserved;
>  
> - /* vcpu power-off state */
> - bool power_off;
> + /* Current VCPU MP state */
> + u32 mp_state;
>  
>   /* Don't run the guest (internal implementation need) */
>   bool pause;
> @@ -328,6 +328,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
>  
>  struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
>  struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
> +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
> +void kvm_arm_vcpu_power_on(struct kvm_vcpu *vcpu);
>  void kvm_arm_halt_guest(struct kvm *kvm);
>  void kvm_arm_resume_guest(struct kvm *kvm);
>  
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index e4bec508ee5b..954e77608d29 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -368,21 +368,23 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>   kvm_timer_vcpu_put(vcpu);
>  }
>  
> -static void vcpu_power_off(struct kvm_vcpu *vcpu)
> +void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
>  {
> - vcpu->arch.power_off = true;
> + vcpu->arch.mp_state = KVM_MP_STATE_STOPPED;
>   kvm_make_request(KVM_REQ_SLEEP, vcpu);
>   kvm_vcpu_kick(vcpu);
>  }
>  
> +void kvm_arm_vcpu_power_on(struct kvm_vcpu *vcpu)
> +{
> + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> + kvm_vcpu_kick(vcpu);
> +}
> +
>  int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
>   struct kvm_mp_state *mp_state)
>  {
> - if (vcpu->arch.power_off)
> - mp_state->mp_state = KVM_MP_STATE_STOPPED;
> - else
> - mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
> -
> + mp_state->mp_state = vcpu->arch.mp_state;
>   return 0;
>  }
>  
> @@ -391,10 +393,10 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu 
> *vcpu,
>  {
>   switch (mp_state->mp_state) {
>   case KVM_MP_STATE_RUNNABLE:
> - vcpu->arch.power_off = false;
> + kvm_arm_vcpu_power_on(vcpu);
>   break;
>   case KVM_MP_STATE_STOPPED:
> - vcpu_power_off(vcpu);
> + kvm_arm_vcpu_power_off(vcpu);
>   break;
>   default:
>   return -EINVAL;
> @@ -405,7 +407,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
>  
>  static bool vcpu_should_sleep(struct kvm_vcpu *vcpu)
>  {
> - return vcpu->arch.power_off || vcpu->arch.pause;
> + return vcpu->arch.mp_state == KVM_MP_STATE_STOPPED || vcpu->arch.pause;
>  }
>  
>  /**
> @@ -919,9 +921,9 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu 
> *vcpu,
>* Handle the "start in power-off" case.
>*/
>   if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
> - vcpu_power_off(vcpu);
> + kvm_arm_vcpu_power_off(vcpu);
>   else
> - vcpu->arch.power_off = false;
> + kvm_arm_vcpu_power_on(vcpu);
>  
>   return 0;
>  }
> diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
> index f1e363bab5e8..a7bf152f1247 100644
> --- a/virt/kvm/arm/psci.c
> +++ b/virt/kvm/arm/psci.c
> @@ -64,16 +64,13 @@ static unsigned long kvm_psci_vcpu_suspend(struct 
> kvm_vcpu *vcpu)
>  
>  static void kvm_psci_vcpu_off(struct kvm_vcpu 

Re: [PATCH 1/5] KVM: arm/arm64: tidy 'should sleep' conditions

2017-10-05 Thread Marc Zyngier
On 29/09/17 12:30, Andrew Jones wrote:
> Introduce vcpu_should_sleep() in order to tidy several places up
> that the compound 'power_off || pause' condition is used.
> 
> Signed-off-by: Andrew Jones 
> ---
>  virt/kvm/arm/arm.c | 18 +++---
>  1 file changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index b9f68e4add71..e4bec508ee5b 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -403,17 +403,22 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu 
> *vcpu,
>   return 0;
>  }
>  
> +static bool vcpu_should_sleep(struct kvm_vcpu *vcpu)
> +{
> + return vcpu->arch.power_off || vcpu->arch.pause;
> +}
> +
>  /**
>   * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
> - * @v:   The VCPU pointer
> + * @vcpu:The VCPU pointer
>   *
>   * If the guest CPU is not waiting for interrupts or an interrupt line is
>   * asserted, the CPU is by definition runnable.
>   */
> -int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
> +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
>  {
> - return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
> - && !v->arch.power_off && !v->arch.pause);
> + return (!!vcpu->arch.irq_lines || kvm_vgic_vcpu_pending_irq(vcpu))
> + && !vcpu_should_sleep(vcpu);
>  }
>  
>  bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
> @@ -570,10 +575,9 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
>  {
>   struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
>  
> - swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
> -(!vcpu->arch.pause)));
> + swait_event_interruptible(*wq, !vcpu_should_sleep(vcpu));
>  
> - if (vcpu->arch.power_off || vcpu->arch.pause) {
> + if (vcpu_should_sleep(vcpu)) {
>   /* Awaken to handle a signal, request we sleep again later. */
>   kvm_make_request(KVM_REQ_SLEEP, vcpu);
>   }
> 

Acked-by: Marc Zyngier 

M.
-- 
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm