[PATCH v3 22/34] KVM: SVM: Add support for CR4 write traps for an SEV-ES guest

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

For SEV-ES guests, the interception of control register write access
is not recommended. Control register interception occurs prior to the
control register being modified and the hypervisor is unable to modify
the control register itself because the register is located in the
encrypted register state.

SEV-ES guests introduce new control register write traps. These traps
provide intercept support of a control register write after the control
register has been modified. The new control register value is provided in
the VMCB EXITINFO1 field, allowing the hypervisor to track the setting
of the guest control registers.

Add support to track the value of the guest CR4 register using the control
register write trap so that the hypervisor understands the guest operating
mode.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/include/uapi/asm/svm.h |  1 +
 arch/x86/kvm/svm/svm.c  |  6 ++
 arch/x86/kvm/x86.c  | 32 
 4 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 068853bcbc74..bd7169de7bcb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1476,6 +1476,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 
tss_selector, int idt_index,
 int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long 
cr0);
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
+int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long 
cr4);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 14b0d97b50e2..c4152689ea93 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -203,6 +203,7 @@
{ SVM_EXIT_XSETBV,  "xsetbv" }, \
{ SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \
{ SVM_EXIT_CR0_WRITE_TRAP,  "write_cr0_trap" }, \
+   { SVM_EXIT_CR4_WRITE_TRAP,  "write_cr4_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7a5adc2326fe..20cf629a0fdb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2481,6 +2481,11 @@ static int cr_trap(struct vcpu_svm *svm)
 
ret = __kvm_set_cr0(&svm->vcpu, old_value, new_value);
break;
+   case 4:
+   old_value = kvm_read_cr4(&svm->vcpu);
+
+   ret = __kvm_set_cr4(&svm->vcpu, old_value, new_value);
+   break;
default:
WARN(1, "unhandled CR%d write trap", cr);
ret = 1;
@@ -3071,6 +3076,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_RDPRU]= rdpru_interception,
[SVM_EXIT_EFER_WRITE_TRAP]  = efer_trap,
[SVM_EXIT_CR0_WRITE_TRAP]   = cr_trap,
+   [SVM_EXIT_CR4_WRITE_TRAP]   = cr_trap,
[SVM_EXIT_INVPCID]  = invpcid_interception,
[SVM_EXIT_NPF]  = npf_interception,
[SVM_EXIT_RSM]  = rsm_interception,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bc9beb1c4c8c..b42bc0418f98 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -983,12 +983,30 @@ int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long 
cr4)
 }
 EXPORT_SYMBOL_GPL(kvm_valid_cr4);
 
+int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long 
cr4)
+{
+   unsigned long mmu_role_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
+
+   if (kvm_x86_ops.set_cr4(vcpu, cr4))
+   return 1;
+
+   if (((cr4 ^ old_cr4) & mmu_role_bits) ||
+   (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
+   kvm_mmu_reset_context(vcpu);
+
+   if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+   kvm_update_cpuid_runtime(vcpu);
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(__kvm_set_cr4);
+
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
unsigned long old_cr4 = kvm_read_cr4(vcpu);
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
   X86_CR4_SMEP;
-   unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE;
 
if (kvm_va

[PATCH v3 26/34] KVM: SVM: Guest FPU state save/restore not needed for SEV-ES guest

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

The guest FPU state is automatically restored on VMRUN and saved on VMEXIT
by the hardware, so there is no reason to do this in KVM. Eliminate the
allocation of the guest_fpu save area and key off that to skip operations
related to the guest FPU state.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/svm/svm.c  |  8 +
 arch/x86/kvm/x86.c  | 56 +++--
 3 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 51343c7e69fb..3ef63ab71701 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1473,6 +1473,8 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, 
u8 vector);
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
int reason, bool has_error_code, u32 error_code);
 
+void kvm_free_guest_fpu(struct kvm_vcpu *vcpu);
+
 int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long 
cr0);
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b8167a889d8d..ecec3d872922 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1317,6 +1317,14 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!vmsa_page)
goto error_free_vmcb_page;
+
+   /*
+* SEV-ES guests maintain an encrypted version of their FPU
+* state which is restored and saved on VMRUN and VMEXIT.
+* Free the fpu structure to prevent KVM from attempting to
+* access the FPU state.
+*/
+   kvm_free_guest_fpu(vcpu);
}
 
err = avic_init_vcpu(svm);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aecd931f46be..c0a33d5cdc00 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4494,6 +4494,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 struct kvm_xsave *guest_xsave)
 {
+   if (!vcpu->arch.guest_fpu)
+   return;
+
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
memset(guest_xsave, 0, sizeof(struct kvm_xsave));
fill_xsave((u8 *) guest_xsave->region, vcpu);
@@ -4511,9 +4514,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu 
*vcpu,
 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
 {
-   u64 xstate_bv =
-   *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
-   u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / 
sizeof(u32)];
+   u64 xstate_bv;
+   u32 mxcsr;
+
+   if (!vcpu->arch.guest_fpu)
+   return 0;
+
+   xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / 
sizeof(u32)];
+   mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
 
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
/*
@@ -9238,9 +9246,14 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
kvm_save_current_fpu(vcpu->arch.user_fpu);
 
-   /* PKRU is separately restored in kvm_x86_ops.run.  */
-   __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
-   ~XFEATURE_MASK_PKRU);
+   /*
+* Guests with protected state can't have it set by the hypervisor,
+* so skip trying to set it.
+*/
+   if (vcpu->arch.guest_fpu)
+   /* PKRU is separately restored in kvm_x86_ops.run. */
+   __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
+   ~XFEATURE_MASK_PKRU);
 
fpregs_mark_activate();
fpregs_unlock();
@@ -9253,7 +9266,12 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
fpregs_lock();
 
-   kvm_save_current_fpu(vcpu->arch.guest_fpu);
+   /*
+* Guests with protected state can't have it read by the hypervisor,
+* so skip trying to save it.
+*/
+   if (vcpu->arch.guest_fpu)
+   kvm_save_current_fpu(vcpu->arch.guest_fpu);
 
copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
 
@@ -9769,6 +9787,9 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, 
struct kvm_fpu *fpu)
 {
struct fxregs_state *fxsave;
 
+   if (!vcpu->arch.guest_fpu)
+   return 0;
+
vcpu_load(vcpu);
 
fxsave = &vcpu->arch.guest_fpu->state.fxsave;
@@ -9789,6 +9810,9 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu

[PATCH v3 23/34] KVM: SVM: Add support for CR8 write traps for an SEV-ES guest

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

For SEV-ES guests, the interception of control register write access
is not recommended. Control register interception occurs prior to the
control register being modified and the hypervisor is unable to modify
the control register itself because the register is located in the
encrypted register state.

SEV-ES guests introduce new control register write traps. These traps
provide intercept support of a control register write after the control
register has been modified. The new control register value is provided in
the VMCB EXITINFO1 field, allowing the hypervisor to track the setting
of the guest control registers.

Add support to track the value of the guest CR8 register using the control
register write trap so that the hypervisor understands the guest operating
mode.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/uapi/asm/svm.h | 1 +
 arch/x86/kvm/svm/svm.c  | 4 
 2 files changed, 5 insertions(+)

diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index c4152689ea93..554f75fe013c 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -204,6 +204,7 @@
{ SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \
{ SVM_EXIT_CR0_WRITE_TRAP,  "write_cr0_trap" }, \
{ SVM_EXIT_CR4_WRITE_TRAP,  "write_cr4_trap" }, \
+   { SVM_EXIT_CR8_WRITE_TRAP,  "write_cr8_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 20cf629a0fdb..fb529d5cb36d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2486,6 +2486,9 @@ static int cr_trap(struct vcpu_svm *svm)
 
ret = __kvm_set_cr4(&svm->vcpu, old_value, new_value);
break;
+   case 8:
+   ret = kvm_set_cr8(&svm->vcpu, new_value);
+   break;
default:
WARN(1, "unhandled CR%d write trap", cr);
ret = 1;
@@ -3077,6 +3080,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_EFER_WRITE_TRAP]  = efer_trap,
[SVM_EXIT_CR0_WRITE_TRAP]   = cr_trap,
[SVM_EXIT_CR4_WRITE_TRAP]   = cr_trap,
+   [SVM_EXIT_CR8_WRITE_TRAP]   = cr_trap,
[SVM_EXIT_INVPCID]  = invpcid_interception,
[SVM_EXIT_NPF]  = npf_interception,
[SVM_EXIT_RSM]  = rsm_interception,
-- 
2.28.0



[PATCH v3 25/34] KVM: SVM: Do not report support for SMM for an SEV-ES guest

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

SEV-ES guests do not currently support SMM. Update the has_emulated_msr()
kvm_x86_ops function to take a struct kvm parameter so that the capability
can be reported at a VM level.

Since this op is also called during KVM initialization and before a struct
kvm instance is available, comments will be added to each implementation
of has_emulated_msr() to indicate the kvm parameter can be null.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/svm/svm.c  | 11 ++-
 arch/x86/kvm/vmx/vmx.c  |  6 +-
 arch/x86/kvm/x86.c  |  4 ++--
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bd7169de7bcb..51343c7e69fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1091,7 +1091,7 @@ struct kvm_x86_ops {
void (*hardware_disable)(void);
void (*hardware_unsetup)(void);
bool (*cpu_has_accelerated_tpr)(void);
-   bool (*has_emulated_msr)(u32 index);
+   bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
 
unsigned int vm_size;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index fb529d5cb36d..b8167a889d8d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3922,12 +3922,21 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
 }
 
-static bool svm_has_emulated_msr(u32 index)
+/*
+ * The kvm parameter can be NULL (module initialization, or invocation before
+ * VM creation). Be sure to check the kvm parameter before using it.
+ */
+static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
 {
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return false;
+   case MSR_IA32_SMBASE:
+   /* SEV-ES guests do not support SMM, so report false */
+   if (kvm && sev_es_guest(kvm))
+   return false;
+   break;
default:
break;
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 47b8357b9751..006d91dca695 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6399,7 +6399,11 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
handle_exception_nmi_irqoff(vmx);
 }
 
-static bool vmx_has_emulated_msr(u32 index)
+/*
+ * The kvm parameter can be NULL (module initialization, or invocation before
+ * VM creation). Be sure to check the kvm parameter before using it.
+ */
+static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
 {
switch (index) {
case MSR_IA32_SMBASE:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 647d9e47195a..aecd931f46be 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3777,7 +3777,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
ext)
 * fringe case that is not enabled except via specific settings
 * of the module parameters.
 */
-   r = kvm_x86_ops.has_emulated_msr(MSR_IA32_SMBASE);
+   r = kvm_x86_ops.has_emulated_msr(kvm, MSR_IA32_SMBASE);
break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops.cpu_has_accelerated_tpr();
@@ -5789,7 +5789,7 @@ static void kvm_init_msr_list(void)
}
 
for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
-   if (!kvm_x86_ops.has_emulated_msr(emulated_msrs_all[i]))
+   if (!kvm_x86_ops.has_emulated_msr(NULL, emulated_msrs_all[i]))
continue;
 
emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
-- 
2.28.0



[PATCH v3 24/34] KVM: x86: Update __get_sregs() / __set_sregs() to support SEV-ES

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

Since many of the registers used by the SEV-ES are encrypted and cannot
be read or written, adjust the __get_sregs() / __set_sregs() to take into
account whether the VMSA/guest state is encrypted.

For __get_sregs(), return the actual value that is in use by the guest
for all registers being tracked using the write trap support.

For __set_sregs(), skip setting of all guest registers values.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/x86.c | 27 ++-
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b42bc0418f98..647d9e47195a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9432,6 +9432,9 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
 {
struct desc_ptr dt;
 
+   if (vcpu->arch.guest_state_protected)
+   goto skip_protected_regs;
+
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -9449,9 +9452,11 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
sregs->gdt.limit = dt.size;
sregs->gdt.base = dt.address;
 
-   sregs->cr0 = kvm_read_cr0(vcpu);
sregs->cr2 = vcpu->arch.cr2;
sregs->cr3 = kvm_read_cr3(vcpu);
+
+skip_protected_regs:
+   sregs->cr0 = kvm_read_cr0(vcpu);
sregs->cr4 = kvm_read_cr4(vcpu);
sregs->cr8 = kvm_get_cr8(vcpu);
sregs->efer = vcpu->arch.efer;
@@ -9590,6 +9595,9 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
if (kvm_set_apic_base(vcpu, &apic_base_msr))
goto out;
 
+   if (vcpu->arch.guest_state_protected)
+   goto skip_protected_regs;
+
dt.size = sregs->idt.limit;
dt.address = sregs->idt.base;
kvm_x86_ops.set_idt(vcpu, &dt);
@@ -9628,14 +9636,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
 
-   max_bits = KVM_NR_INTERRUPTS;
-   pending_vec = find_first_bit(
-   (const unsigned long *)sregs->interrupt_bitmap, max_bits);
-   if (pending_vec < max_bits) {
-   kvm_queue_interrupt(vcpu, pending_vec, false);
-   pr_debug("Set back pending irq %d\n", pending_vec);
-   }
-
kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -9654,6 +9654,15 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
!is_protmode(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
+skip_protected_regs:
+   max_bits = KVM_NR_INTERRUPTS;
+   pending_vec = find_first_bit(
+   (const unsigned long *)sregs->interrupt_bitmap, max_bits);
+   if (pending_vec < max_bits) {
+   kvm_queue_interrupt(vcpu, pending_vec, false);
+   pr_debug("Set back pending irq %d\n", pending_vec);
+   }
+
kvm_make_request(KVM_REQ_EVENT, vcpu);
 
ret = 0;
-- 
2.28.0



[PATCH v3 21/34] KVM: SVM: Add support for CR0 write traps for an SEV-ES guest

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

For SEV-ES guests, the interception of control register write access
is not recommended. Control register interception occurs prior to the
control register being modified and the hypervisor is unable to modify
the control register itself because the register is located in the
encrypted register state.

SEV-ES support introduces new control register write traps. These traps
provide intercept support of a control register write after the control
register has been modified. The new control register value is provided in
the VMCB EXITINFO1 field, allowing the hypervisor to track the setting
of the guest control registers.

Add support to track the value of the guest CR0 register using the control
register write trap so that the hypervisor understands the guest operating
mode.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/include/uapi/asm/svm.h | 17 ++
 arch/x86/kvm/svm/svm.c  | 24 +++
 arch/x86/kvm/x86.c  | 41 +++--
 4 files changed, 66 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4fe718e339c9..068853bcbc74 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1473,6 +1473,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, 
u8 vector);
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
int reason, bool has_error_code, u32 error_code);
 
+int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long 
cr0);
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 6e3f92e17655..14b0d97b50e2 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -78,6 +78,22 @@
 #define SVM_EXIT_XSETBV0x08d
 #define SVM_EXIT_RDPRU 0x08e
 #define SVM_EXIT_EFER_WRITE_TRAP   0x08f
+#define SVM_EXIT_CR0_WRITE_TRAP0x090
+#define SVM_EXIT_CR1_WRITE_TRAP0x091
+#define SVM_EXIT_CR2_WRITE_TRAP0x092
+#define SVM_EXIT_CR3_WRITE_TRAP0x093
+#define SVM_EXIT_CR4_WRITE_TRAP0x094
+#define SVM_EXIT_CR5_WRITE_TRAP0x095
+#define SVM_EXIT_CR6_WRITE_TRAP0x096
+#define SVM_EXIT_CR7_WRITE_TRAP0x097
+#define SVM_EXIT_CR8_WRITE_TRAP0x098
+#define SVM_EXIT_CR9_WRITE_TRAP0x099
+#define SVM_EXIT_CR10_WRITE_TRAP   0x09a
+#define SVM_EXIT_CR11_WRITE_TRAP   0x09b
+#define SVM_EXIT_CR12_WRITE_TRAP   0x09c
+#define SVM_EXIT_CR13_WRITE_TRAP   0x09d
+#define SVM_EXIT_CR14_WRITE_TRAP   0x09e
+#define SVM_EXIT_CR15_WRITE_TRAP   0x09f
 #define SVM_EXIT_INVPCID   0x0a2
 #define SVM_EXIT_NPF   0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI   0x401
@@ -186,6 +202,7 @@
{ SVM_EXIT_MWAIT,   "mwait" }, \
{ SVM_EXIT_XSETBV,  "xsetbv" }, \
{ SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \
+   { SVM_EXIT_CR0_WRITE_TRAP,  "write_cr0_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e16c1b49b34f..7a5adc2326fe 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2466,6 +2466,29 @@ static int cr_interception(struct vcpu_svm *svm)
return kvm_complete_insn_gp(&svm->vcpu, err);
 }
 
+static int cr_trap(struct vcpu_svm *svm)
+{
+   unsigned long old_value, new_value;
+   unsigned int cr;
+   int ret;
+
+   new_value = (unsigned long)svm->vmcb->control.exit_info_1;
+
+   cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP;
+   switch (cr) {
+   case 0:
+   old_value = kvm_read_cr0(&svm->vcpu);
+
+   ret = __kvm_set_cr0(&svm->vcpu, old_value, new_value);
+   break;
+   default:
+   WARN(1, "unhandled CR%d write trap", cr);
+   ret = 1;
+   }
+
+   return kvm_complete_insn_gp(&svm->vcpu, ret);
+}
+
 static int dr_interception(struct vcpu_svm *svm)
 {
int reg, dr;
@@ -3047,6 +3070,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_XSETBV]   = xsetbv_interception,
[SVM_EXIT_RDPRU]= rdpru_interception

[PATCH v3 19/34] KVM: SVM: Support string IO operations for an SEV-ES guest

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

For an SEV-ES guest, string-based port IO is performed to a shared
(un-encrypted) page so that both the hypervisor and guest can read or
write to it and each see the contents.

For string-based port IO operations, invoke SEV-ES specific routines that
can complete the operation using common KVM port IO support.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm/sev.c  | 18 ++--
 arch/x86/kvm/svm/svm.c  | 11 +--
 arch/x86/kvm/svm/svm.h  |  1 +
 arch/x86/kvm/x86.c  | 51 +
 arch/x86/kvm/x86.h  |  3 ++
 6 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7776bb18e29d..4fe718e339c9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -614,6 +614,7 @@ struct kvm_vcpu_arch {
 
struct kvm_pio_request pio;
void *pio_data;
+   void *guest_ins_data;
 
u8 event_exit_inst_len;
 
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 9dde60014f01..75a38dbebe79 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1403,9 +1403,14 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
case SVM_EXIT_INVD:
break;
case SVM_EXIT_IOIO:
-   if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
-   if (!ghcb_rax_is_valid(ghcb))
+   if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
+   if (!ghcb_sw_scratch_is_valid(ghcb))
goto vmgexit_err;
+   } else {
+   if (!(ghcb_get_sw_exit_info_1(ghcb) & 
SVM_IOIO_TYPE_MASK))
+   if (!ghcb_rax_is_valid(ghcb))
+   goto vmgexit_err;
+   }
break;
case SVM_EXIT_MSR:
if (!ghcb_rcx_is_valid(ghcb))
@@ -1772,3 +1777,12 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
 
return ret;
 }
+
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
+{
+   if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
+   return -EINVAL;
+
+   return kvm_sev_es_string_io(&svm->vcpu, size, port,
+   svm->ghcb_sa, svm->ghcb_sa_len, in);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 02f8e83df2d3..fa15223a2106 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2054,11 +2054,16 @@ static int io_interception(struct vcpu_svm *svm)
++svm->vcpu.stat.io_exits;
string = (io_info & SVM_IOIO_STR_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
-   if (string)
-   return kvm_emulate_instruction(vcpu, 0);
-
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
+
+   if (string) {
+   if (sev_es_guest(vcpu->kvm))
+   return sev_es_string_io(svm, size, port, in);
+   else
+   return kvm_emulate_instruction(vcpu, 0);
+   }
+
svm->next_rip = svm->vmcb->control.exit_info_2;
 
return kvm_fast_pio(&svm->vcpu, size, port, in);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f5e5b91e06d3..1c1399b9516a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -572,5 +572,6 @@ void __init sev_hardware_setup(void);
 void sev_hardware_teardown(void);
 void sev_free_vcpu(struct kvm_vcpu *vcpu);
 int sev_handle_vmgexit(struct vcpu_svm *svm);
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int 
in);
 
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe9064a8139f..5f1835cca28d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10757,6 +10757,10 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
 {
+   /* Can't read the RIP when guest state is protected, just return 0 */
+   if (vcpu->arch.guest_state_protected)
+   return 0;
+
if (is_64_bit_mode(vcpu))
return kvm_rip_read(vcpu);
return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
@@ -11389,6 +11393,53 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t 
gpa, unsigned int bytes,
 }
 EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
 
+static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+{
+   memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data,
+  vcpu->arch.pio.count * vcpu->arch.pio.size);
+   vcpu->arch.pio.count = 0;
+
+   return 1;
+}
+
+static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
+  unsigned int 

[PATCH v3 20/34] KVM: SVM: Add support for EFER write traps for an SEV-ES guest

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

For SEV-ES guests, the interception of EFER write access is not
recommended. EFER interception occurs prior to EFER being modified and
the hypervisor is unable to modify EFER itself because the register is
located in the encrypted register state.

SEV-ES support introduces a new EFER write trap. This trap provides
intercept support of an EFER write after it has been modified. The new
EFER value is provided in the VMCB EXITINFO1 field, allowing the
hypervisor to track the setting of the guest EFER.

Add support to track the value of the guest EFER value using the EFER
write trap so that the hypervisor understands the guest operating mode.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/uapi/asm/svm.h |  2 ++
 arch/x86/kvm/svm/svm.c  | 20 
 2 files changed, 22 insertions(+)

diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 09f723945425..6e3f92e17655 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -77,6 +77,7 @@
 #define SVM_EXIT_MWAIT_COND0x08c
 #define SVM_EXIT_XSETBV0x08d
 #define SVM_EXIT_RDPRU 0x08e
+#define SVM_EXIT_EFER_WRITE_TRAP   0x08f
 #define SVM_EXIT_INVPCID   0x0a2
 #define SVM_EXIT_NPF   0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI   0x401
@@ -184,6 +185,7 @@
{ SVM_EXIT_MONITOR, "monitor" }, \
{ SVM_EXIT_MWAIT,   "mwait" }, \
{ SVM_EXIT_XSETBV,  "xsetbv" }, \
+   { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index fa15223a2106..e16c1b49b34f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2519,6 +2519,25 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
 }
 
+static int efer_trap(struct vcpu_svm *svm)
+{
+   struct msr_data msr_info;
+   int ret;
+
+   /*
+* Clear the EFER_SVME bit from EFER. The SVM code always sets this
+* bit in svm_set_efer(), but __kvm_valid_efer() checks it against
+* whether the guest has X86_FEATURE_SVM - this avoids a failure if
+* the guest doesn't have X86_FEATURE_SVM.
+*/
+   msr_info.host_initiated = false;
+   msr_info.index = MSR_EFER;
+   msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME;
+   ret = kvm_set_msr_common(&svm->vcpu, &msr_info);
+
+   return kvm_complete_insn_gp(&svm->vcpu, ret);
+}
+
 static int svm_get_msr_feature(struct kvm_msr_entry *msr)
 {
msr->data = 0;
@@ -3027,6 +3046,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_MWAIT]= mwait_interception,
[SVM_EXIT_XSETBV]   = xsetbv_interception,
[SVM_EXIT_RDPRU]= rdpru_interception,
+   [SVM_EXIT_EFER_WRITE_TRAP]  = efer_trap,
[SVM_EXIT_INVPCID]  = invpcid_interception,
[SVM_EXIT_NPF]  = npf_interception,
[SVM_EXIT_RSM]  = rsm_interception,
-- 
2.28.0



[PATCH v3 18/34] KVM: SVM: Support MMIO for an SEV-ES guest

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

For an SEV-ES guest, MMIO is performed to a shared (un-encrypted) page
so that both the hypervisor and guest can read or write to it and each
see the contents.

The GHCB specification provides software-defined VMGEXIT exit codes to
indicate a request for an MMIO read or an MMIO write. Add support to
recognize the MMIO requests and invoke SEV-ES specific routines that
can complete the MMIO operation. These routines use common KVM support
to complete the MMIO operation.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 124 +
 arch/x86/kvm/svm/svm.h |   6 ++
 arch/x86/kvm/x86.c | 123 
 arch/x86/kvm/x86.h |   5 ++
 4 files changed, 258 insertions(+)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 50081ce6b0a9..9dde60014f01 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1259,6 +1259,9 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
if (vcpu->arch.guest_state_protected)
sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
__free_page(virt_to_page(svm->vmsa));
+
+   if (svm->ghcb_sa_free)
+   kfree(svm->ghcb_sa);
 }
 
 static void dump_ghcb(struct vcpu_svm *svm)
@@ -1433,6 +1436,11 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
!ghcb_rcx_is_valid(ghcb))
goto vmgexit_err;
break;
+   case SVM_VMGEXIT_MMIO_READ:
+   case SVM_VMGEXIT_MMIO_WRITE:
+   if (!ghcb_sw_scratch_is_valid(ghcb))
+   goto vmgexit_err;
+   break;
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
break;
default:
@@ -1467,6 +1475,24 @@ static void pre_sev_es_run(struct vcpu_svm *svm)
if (!svm->ghcb)
return;
 
+   if (svm->ghcb_sa_free) {
+   /*
+* The scratch area lives outside the GHCB, so there is a
+* buffer that, depending on the operation performed, may
+* need to be synced, then freed.
+*/
+   if (svm->ghcb_sa_sync) {
+   kvm_write_guest(svm->vcpu.kvm,
+   ghcb_get_sw_scratch(svm->ghcb),
+   svm->ghcb_sa, svm->ghcb_sa_len);
+   svm->ghcb_sa_sync = false;
+   }
+
+   kfree(svm->ghcb_sa);
+   svm->ghcb_sa = NULL;
+   svm->ghcb_sa_free = false;
+   }
+
trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
 
sev_es_sync_to_ghcb(svm);
@@ -1501,6 +1527,86 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
 }
 
+#define GHCB_SCRATCH_AREA_LIMIT(16ULL * PAGE_SIZE)
+static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+{
+   struct vmcb_control_area *control = &svm->vmcb->control;
+   struct ghcb *ghcb = svm->ghcb;
+   u64 ghcb_scratch_beg, ghcb_scratch_end;
+   u64 scratch_gpa_beg, scratch_gpa_end;
+   void *scratch_va;
+
+   scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+   if (!scratch_gpa_beg) {
+   pr_err("vmgexit: scratch gpa not provided\n");
+   return false;
+   }
+
+   scratch_gpa_end = scratch_gpa_beg + len;
+   if (scratch_gpa_end < scratch_gpa_beg) {
+   pr_err("vmgexit: scratch length (%#llx) not valid for scratch 
address (%#llx)\n",
+  len, scratch_gpa_beg);
+   return false;
+   }
+
+   if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
+   /* Scratch area begins within GHCB */
+   ghcb_scratch_beg = control->ghcb_gpa +
+  offsetof(struct ghcb, shared_buffer);
+   ghcb_scratch_end = control->ghcb_gpa +
+  offsetof(struct ghcb, reserved_1);
+
+   /*
+* If the scratch area begins within the GHCB, it must be
+* completely contained in the GHCB shared buffer area.
+*/
+   if (scratch_gpa_beg < ghcb_scratch_beg ||
+   scratch_gpa_end > ghcb_scratch_end) {
+   pr_err("vmgexit: scratch area is outside of GHCB shared 
buffer area (%#llx - %#llx)\n",
+  scratch_gpa_beg, scratch_gpa_end);
+   return false;
+   }
+
+   scratch_va = (void *)svm->ghcb;
+   scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
+   } else {
+   /*
+* The guest memory must be read into a kernel buffer, so
+* limit the size
+*/
+   if (len > GH

[PATCH v3 13/34] KVM: SVM: Create trace events for VMGEXIT processing

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

Add trace events for entry to and exit from VMGEXIT processing. The vCPU
id and the exit reason will be common for the trace events. The exit info
fields will represent the input and output values for the entry and exit
events, respectively.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c |  6 +
 arch/x86/kvm/trace.h   | 53 ++
 arch/x86/kvm/x86.c |  2 ++
 3 files changed, 61 insertions(+)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 50afe9af4209..6128ac9dd5e2 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -14,11 +14,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "x86.h"
 #include "svm.h"
 #include "cpuid.h"
+#include "trace.h"
 
 static int sev_flush_asids(void);
 static DECLARE_RWSEM(sev_deactivate_lock);
@@ -1461,6 +1463,8 @@ static void pre_sev_es_run(struct vcpu_svm *svm)
if (!svm->ghcb)
return;
 
+   trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
+
sev_es_sync_to_ghcb(svm);
 
kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
@@ -1525,6 +1529,8 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
svm->ghcb = svm->ghcb_map.hva;
ghcb = svm->ghcb_map.hva;
 
+   trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
+
exit_code = ghcb_get_sw_exit_code(ghcb);
 
ret = sev_es_validate_vmgexit(svm);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index aef960f90f26..7da931a511c9 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1578,6 +1578,59 @@ TRACE_EVENT(kvm_hv_syndbg_get_msr,
  __entry->vcpu_id, __entry->vp_index, __entry->msr,
  __entry->data)
 );
+
+/*
+ * Tracepoint for the start of VMGEXIT processing
+ */
+TRACE_EVENT(kvm_vmgexit_enter,
+   TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb),
+   TP_ARGS(vcpu_id, ghcb),
+
+   TP_STRUCT__entry(
+   __field(unsigned int, vcpu_id)
+   __field(u64, exit_reason)
+   __field(u64, info1)
+   __field(u64, info2)
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id = vcpu_id;
+   __entry->exit_reason = ghcb->save.sw_exit_code;
+   __entry->info1   = ghcb->save.sw_exit_info_1;
+   __entry->info2   = ghcb->save.sw_exit_info_2;
+   ),
+
+   TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx",
+ __entry->vcpu_id, __entry->exit_reason,
+ __entry->info1, __entry->info2)
+);
+
+/*
+ * Tracepoint for the end of VMGEXIT processing
+ */
+TRACE_EVENT(kvm_vmgexit_exit,
+   TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb),
+   TP_ARGS(vcpu_id, ghcb),
+
+   TP_STRUCT__entry(
+   __field(unsigned int, vcpu_id)
+   __field(u64, exit_reason)
+   __field(u64, info1)
+   __field(u64, info2)
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id = vcpu_id;
+   __entry->exit_reason = ghcb->save.sw_exit_code;
+   __entry->info1   = ghcb->save.sw_exit_info_1;
+   __entry->info2   = ghcb->save.sw_exit_info_2;
+   ),
+
+   TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx",
+ __entry->vcpu_id, __entry->exit_reason,
+ __entry->info1, __entry->info2)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 569fbdb4ee87..1f60e4ffbbda 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11288,3 +11288,5 @@ 
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
-- 
2.28.0



[PATCH v3 14/34] KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x002

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

The GHCB specification defines a GHCB MSR protocol using the lower
12-bits of the GHCB MSR (in the hypervisor this corresponds to the
GHCB GPA field in the VMCB).

Function 0x002 is a request to set the GHCB MSR value to the SEV INFO as
per the specification via the VMCB GHCB GPA field.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 26 +-
 arch/x86/kvm/svm/svm.h | 17 +
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6128ac9dd5e2..204ea9422af5 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -22,6 +22,7 @@
 #include "cpuid.h"
 #include "trace.h"
 
+static u8 sev_enc_bit;
 static int sev_flush_asids(void);
 static DECLARE_RWSEM(sev_deactivate_lock);
 static DEFINE_MUTEX(sev_bitmap_lock);
@@ -1142,6 +1143,9 @@ void __init sev_hardware_setup(void)
/* Retrieve SEV CPUID information */
cpuid(0x801f, &eax, &ebx, &ecx, &edx);
 
+   /* Set encryption bit location for SEV-ES guests */
+   sev_enc_bit = ebx & 0x3f;
+
/* Maximum number of encrypted guests supported simultaneously */
max_sev_asid = ecx;
 
@@ -1497,9 +1501,29 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
 }
 
+static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
+{
+   svm->vmcb->control.ghcb_gpa = value;
+}
+
 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 {
-   return -EINVAL;
+   struct vmcb_control_area *control = &svm->vmcb->control;
+   u64 ghcb_info;
+
+   ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
+
+   switch (ghcb_info) {
+   case GHCB_MSR_SEV_INFO_REQ:
+   set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+   GHCB_VERSION_MIN,
+   sev_enc_bit));
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 1;
 }
 
 int sev_handle_vmgexit(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 4ee217338d0b..b975c0819819 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -513,9 +513,26 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 
 /* sev.c */
 
+#define GHCB_VERSION_MAX   1ULL
+#define GHCB_VERSION_MIN   1ULL
+
 #define GHCB_MSR_INFO_POS  0
 #define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1)
 
+#define GHCB_MSR_SEV_INFO_RESP 0x001
+#define GHCB_MSR_SEV_INFO_REQ  0x002
+#define GHCB_MSR_VER_MAX_POS   48
+#define GHCB_MSR_VER_MAX_MASK  0x
+#define GHCB_MSR_VER_MIN_POS   32
+#define GHCB_MSR_VER_MIN_MASK  0x
+#define GHCB_MSR_CBIT_POS  24
+#define GHCB_MSR_CBIT_MASK 0xff
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)   \
+   _max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) |   \
+(((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) |   \
+(((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |\
+GHCB_MSR_SEV_INFO_RESP)
+
 extern unsigned int max_sev_asid;
 
 static inline bool svm_sev_enabled(void)
-- 
2.28.0



[PATCH v3 11/34] KVM: SVM: Prepare for SEV-ES exit handling in the sev.c file

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

This is a pre-patch to consolidate some exit handling code into callable
functions. Follow-on patches for SEV-ES exit handling will then be able
to use them from the sev.c file.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c | 64 +-
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f353039e54b6..602e20f38bdc 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3147,6 +3147,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
   "excp_to:", save->last_excp_to);
 }
 
+static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+{
+   if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+   svm_exit_handlers[exit_code])
+   return 0;
+
+   vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
+   dump_vmcb(vcpu);
+   vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+   vcpu->run->internal.suberror = 
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+   vcpu->run->internal.ndata = 2;
+   vcpu->run->internal.data[0] = exit_code;
+   vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+   return -EINVAL;
+}
+
+static int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code)
+{
+   if (svm_handle_invalid_exit(&svm->vcpu, exit_code))
+   return 0;
+
+#ifdef CONFIG_RETPOLINE
+   if (exit_code == SVM_EXIT_MSR)
+   return msr_interception(svm);
+   else if (exit_code == SVM_EXIT_VINTR)
+   return interrupt_window_interception(svm);
+   else if (exit_code == SVM_EXIT_INTR)
+   return intr_interception(svm);
+   else if (exit_code == SVM_EXIT_HLT)
+   return halt_interception(svm);
+   else if (exit_code == SVM_EXIT_NPF)
+   return npf_interception(svm);
+#endif
+   return svm_exit_handlers[exit_code](svm);
+}
+
 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
  u32 *intr_info, u32 *error_code)
 {
@@ -3213,32 +3250,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t 
exit_fastpath)
if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;
 
-   if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
-   || !svm_exit_handlers[exit_code]) {
-   vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", 
exit_code);
-   dump_vmcb(vcpu);
-   vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-   vcpu->run->internal.suberror =
-   KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
-   vcpu->run->internal.ndata = 2;
-   vcpu->run->internal.data[0] = exit_code;
-   vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
-   return 0;
-   }
-
-#ifdef CONFIG_RETPOLINE
-   if (exit_code == SVM_EXIT_MSR)
-   return msr_interception(svm);
-   else if (exit_code == SVM_EXIT_VINTR)
-   return interrupt_window_interception(svm);
-   else if (exit_code == SVM_EXIT_INTR)
-   return intr_interception(svm);
-   else if (exit_code == SVM_EXIT_HLT)
-   return halt_interception(svm);
-   else if (exit_code == SVM_EXIT_NPF)
-   return npf_interception(svm);
-#endif
-   return svm_exit_handlers[exit_code](svm);
+   return svm_invoke_exit_handler(svm, exit_code);
 }
 
 static void reload_tss(struct kvm_vcpu *vcpu)
-- 
2.28.0



[PATCH v3 15/34] KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x004

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

The GHCB specification defines a GHCB MSR protocol using the lower
12-bits of the GHCB MSR (in the hypervisor this corresponds to the
GHCB GPA field in the VMCB).

Function 0x004 is a request for CPUID information. Only a single CPUID
result register can be sent per invocation, so the protocol defines the
register that is requested. The GHCB MSR value is set to the CPUID
register value as per the specification via the VMCB GHCB GPA field.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 56 --
 arch/x86/kvm/svm/svm.h |  9 +++
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 204ea9422af5..24df6f784a2e 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1501,6 +1501,18 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
 }
 
+static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
+ unsigned int pos)
+{
+   svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
+   svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
+}
+
+static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
+{
+   return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
+}
+
 static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
 {
svm->vmcb->control.ghcb_gpa = value;
@@ -1509,7 +1521,9 @@ static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 {
struct vmcb_control_area *control = &svm->vmcb->control;
+   struct kvm_vcpu *vcpu = &svm->vcpu;
u64 ghcb_info;
+   int ret = 1;
 
ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
 
@@ -1519,11 +1533,49 @@ static int sev_handle_vmgexit_msr_protocol(struct 
vcpu_svm *svm)
GHCB_VERSION_MIN,
sev_enc_bit));
break;
+   case GHCB_MSR_CPUID_REQ: {
+   u64 cpuid_fn, cpuid_reg, cpuid_value;
+
+   cpuid_fn = get_ghcb_msr_bits(svm,
+GHCB_MSR_CPUID_FUNC_MASK,
+GHCB_MSR_CPUID_FUNC_POS);
+
+   /* Initialize the registers needed by the CPUID intercept */
+   vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
+   vcpu->arch.regs[VCPU_REGS_RCX] = 0;
+
+   ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
+   if (!ret) {
+   ret = -EINVAL;
+   break;
+   }
+
+   cpuid_reg = get_ghcb_msr_bits(svm,
+ GHCB_MSR_CPUID_REG_MASK,
+ GHCB_MSR_CPUID_REG_POS);
+   if (cpuid_reg == 0)
+   cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
+   else if (cpuid_reg == 1)
+   cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
+   else if (cpuid_reg == 2)
+   cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
+   else
+   cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
+
+   set_ghcb_msr_bits(svm, cpuid_value,
+ GHCB_MSR_CPUID_VALUE_MASK,
+ GHCB_MSR_CPUID_VALUE_POS);
+
+   set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
+ GHCB_MSR_INFO_MASK,
+ GHCB_MSR_INFO_POS);
+   break;
+   }
default:
-   return -EINVAL;
+   ret = -EINVAL;
}
 
-   return 1;
+   return ret;
 }
 
 int sev_handle_vmgexit(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index b975c0819819..0df18bdef4ef 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -533,6 +533,15 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |\
 GHCB_MSR_SEV_INFO_RESP)
 
+#define GHCB_MSR_CPUID_REQ 0x004
+#define GHCB_MSR_CPUID_RESP0x005
+#define GHCB_MSR_CPUID_FUNC_POS32
+#define GHCB_MSR_CPUID_FUNC_MASK   0x
+#define GHCB_MSR_CPUID_VALUE_POS   32
+#define GHCB_MSR_CPUID_VALUE_MASK  0x
+#define GHCB_MSR_CPUID_REG_POS 30
+#define GHCB_MSR_CPUID_REG_MASK0x3
+
 extern unsigned int max_sev_asid;
 
 static inline bool svm_sev_enabled(void)
-- 
2.28.0



[PATCH v3 17/34] KVM: SVM: Create trace events for VMGEXIT MSR protocol processing

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

Add trace events for entry to and exit from VMGEXIT MSR protocol
processing. The vCPU will be common for the trace events. The MSR
protocol processing is guided by the GHCB GPA in the VMCB, so the GHCB
GPA will represent the input and output values for the entry and exit
events, respectively. Additionally, the exit event will contain the
return code for the event.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c |  6 ++
 arch/x86/kvm/trace.h   | 44 ++
 arch/x86/kvm/x86.c |  2 ++
 3 files changed, 52 insertions(+)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index de8501264c1c..50081ce6b0a9 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1527,6 +1527,9 @@ static int sev_handle_vmgexit_msr_protocol(struct 
vcpu_svm *svm)
 
ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
 
+   trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
+control->ghcb_gpa);
+
switch (ghcb_info) {
case GHCB_MSR_SEV_INFO_REQ:
set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
@@ -1588,6 +1591,9 @@ static int sev_handle_vmgexit_msr_protocol(struct 
vcpu_svm *svm)
ret = -EINVAL;
}
 
+   trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
+   control->ghcb_gpa, ret);
+
return ret;
 }
 
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 7da931a511c9..2de30c20bc26 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1631,6 +1631,50 @@ TRACE_EVENT(kvm_vmgexit_exit,
  __entry->info1, __entry->info2)
 );
 
+/*
+ * Tracepoint for the start of VMGEXIT MSR procotol processing
+ */
+TRACE_EVENT(kvm_vmgexit_msr_protocol_enter,
+   TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa),
+   TP_ARGS(vcpu_id, ghcb_gpa),
+
+   TP_STRUCT__entry(
+   __field(unsigned int, vcpu_id)
+   __field(u64, ghcb_gpa)
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id  = vcpu_id;
+   __entry->ghcb_gpa = ghcb_gpa;
+   ),
+
+   TP_printk("vcpu %u, ghcb_gpa %016llx",
+ __entry->vcpu_id, __entry->ghcb_gpa)
+);
+
+/*
+ * Tracepoint for the end of VMGEXIT MSR procotol processing
+ */
+TRACE_EVENT(kvm_vmgexit_msr_protocol_exit,
+   TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa, int result),
+   TP_ARGS(vcpu_id, ghcb_gpa, result),
+
+   TP_STRUCT__entry(
+   __field(unsigned int, vcpu_id)
+   __field(u64, ghcb_gpa)
+   __field(int, result)
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id  = vcpu_id;
+   __entry->ghcb_gpa = ghcb_gpa;
+   __entry->result   = result;
+   ),
+
+   TP_printk("vcpu %u, ghcb_gpa %016llx, result %d",
+ __entry->vcpu_id, __entry->ghcb_gpa, __entry->result)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1f60e4ffbbda..7b707a638438 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11290,3 +11290,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
-- 
2.28.0



[PATCH v3 16/34] KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x100

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

The GHCB specification defines a GHCB MSR protocol using the lower
12-bits of the GHCB MSR (in the hypervisor this corresponds to the
GHCB GPA field in the VMCB).

Function 0x100 is a request for termination of the guest. The guest has
encountered some situation for which it has requested to be terminated.
The GHCB MSR value contains the reason for the request.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 13 +
 arch/x86/kvm/svm/svm.h |  6 ++
 2 files changed, 19 insertions(+)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 24df6f784a2e..de8501264c1c 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1571,6 +1571,19 @@ static int sev_handle_vmgexit_msr_protocol(struct 
vcpu_svm *svm)
  GHCB_MSR_INFO_POS);
break;
}
+   case GHCB_MSR_TERM_REQ: {
+   u64 reason_set, reason_code;
+
+   reason_set = get_ghcb_msr_bits(svm,
+  GHCB_MSR_TERM_REASON_SET_MASK,
+  GHCB_MSR_TERM_REASON_SET_POS);
+   reason_code = get_ghcb_msr_bits(svm,
+   GHCB_MSR_TERM_REASON_MASK,
+   GHCB_MSR_TERM_REASON_POS);
+   pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
+   reason_set, reason_code);
+   fallthrough;
+   }
default:
ret = -EINVAL;
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 0df18bdef4ef..7e3f8e3e0722 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -542,6 +542,12 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 #define GHCB_MSR_CPUID_REG_POS 30
 #define GHCB_MSR_CPUID_REG_MASK0x3
 
+#define GHCB_MSR_TERM_REQ  0x100
+#define GHCB_MSR_TERM_REASON_SET_POS   12
+#define GHCB_MSR_TERM_REASON_SET_MASK  0xf
+#define GHCB_MSR_TERM_REASON_POS   16
+#define GHCB_MSR_TERM_REASON_MASK  0xff
+
 extern unsigned int max_sev_asid;
 
 static inline bool svm_sev_enabled(void)
-- 
2.28.0



[PATCH v3 09/34] KVM: SVM: Do not allow instruction emulation under SEV-ES

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

When a guest is running as an SEV-ES guest, it is not possible to emulate
instructions. Add support to prevent instruction emulation.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7f805cd5bbe7..0e5f83912b56 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4195,6 +4195,12 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu 
*vcpu, void *insn, int i
bool smep, smap, is_user;
unsigned long cr4;
 
+   /*
+* When the guest is an SEV-ES guest, emulation is not possible.
+*/
+   if (sev_es_guest(vcpu->kvm))
+   return false;
+
/*
 * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
 *
-- 
2.28.0



[PATCH v3 12/34] KVM: SVM: Add initial support for a VMGEXIT VMEXIT

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

SEV-ES adds a new VMEXIT reason code, VMGEXIT. Initial support for a
VMGEXIT includes mapping the GHCB based on the guest GPA, which is
obtained from a new VMCB field, and then validating the required inputs
for the VMGEXIT exit reason.

Since many of the VMGEXIT exit reasons correspond to existing VMEXIT
reasons, the information from the GHCB is copied into the VMCB control
exit code areas and KVM register areas. The standard exit handlers are
invoked, similar to standard VMEXIT processing. Before restarting the
vCPU, the GHCB is updated with any registers that have been updated by
the hypervisor.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/svm.h  |   2 +-
 arch/x86/include/uapi/asm/svm.h |   7 +
 arch/x86/kvm/cpuid.c|   1 +
 arch/x86/kvm/svm/sev.c  | 271 
 arch/x86/kvm/svm/svm.c  |   8 +-
 arch/x86/kvm/svm/svm.h  |   8 +
 6 files changed, 294 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index bce28482d63d..caa8628f5fba 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -130,7 +130,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 exit_int_info_err;
u64 nested_ctl;
u64 avic_vapic_bar;
-   u8 reserved_4[8];
+   u64 ghcb_gpa;
u32 event_inj;
u32 event_inj_err;
u64 nested_cr3;
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index f1d8307454e0..09f723945425 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -81,6 +81,7 @@
 #define SVM_EXIT_NPF   0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI   0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
+#define SVM_EXIT_VMGEXIT   0x403
 
 /* SEV-ES software-defined VMGEXIT events */
 #define SVM_VMGEXIT_MMIO_READ  0x8001
@@ -187,6 +188,12 @@
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
+   { SVM_EXIT_VMGEXIT, "vmgexit" }, \
+   { SVM_VMGEXIT_MMIO_READ,"vmgexit_mmio_read" }, \
+   { SVM_VMGEXIT_MMIO_WRITE,   "vmgexit_mmio_write" }, \
+   { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \
+   { SVM_VMGEXIT_AP_HLT_LOOP,  "vmgexit_ap_hlt_loop" }, \
+   { SVM_VMGEXIT_AP_JUMP_TABLE,"vmgexit_ap_jump_table" }, \
{ SVM_EXIT_ERR, "invalid_guest_state" }
 
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index d50041f570e8..0f6ecbb5e5b0 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -146,6 +146,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
   MSR_IA32_MISC_ENABLE_MWAIT);
}
 }
+EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
 
 static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 151e9eab85a9..50afe9af4209 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -18,6 +18,7 @@
 
 #include "x86.h"
 #include "svm.h"
+#include "cpuid.h"
 
 static int sev_flush_asids(void);
 static DECLARE_RWSEM(sev_deactivate_lock);
@@ -1254,11 +1255,226 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(virt_to_page(svm->vmsa));
 }
 
+static void dump_ghcb(struct vcpu_svm *svm)
+{
+   struct ghcb *ghcb = svm->ghcb;
+   unsigned int nbits;
+
+   /* Re-use the dump_invalid_vmcb module parameter */
+   if (!dump_invalid_vmcb) {
+   pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump 
internal KVM state.\n");
+   return;
+   }
+
+   nbits = sizeof(ghcb->save.valid_bitmap) * 8;
+
+   pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
+   pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
+  ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
+   pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
+  ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
+   pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
+  ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
+   pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
+  ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
+   pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
+}
+
+static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
+{
+   struct kvm_vcpu *vcpu = &svm->vcpu;
+   struct g

[PATCH v3 08/34] KVM: SVM: Prevent debugging under SEV-ES

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

Since the guest register state of an SEV-ES guest is encrypted, debugging
is not supported. Update the code to prevent guest debugging when the
guest has protected state.

Additionally, an SEV-ES guest must only and always intercept DR7 reads and
writes. Update set_dr_intercepts() and clr_dr_intercepts() to account for
this.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c |  9 +
 arch/x86/kvm/svm/svm.h | 81 +++---
 arch/x86/kvm/x86.c |  3 ++
 3 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9a3d57ed997f..7f805cd5bbe7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1802,6 +1802,9 @@ static void svm_set_dr6(struct vcpu_svm *svm, unsigned 
long value)
 {
struct vmcb *vmcb = svm->vmcb;
 
+   if (svm->vcpu.arch.guest_state_protected)
+   return;
+
if (unlikely(value != vmcb->save.dr6)) {
vmcb->save.dr6 = value;
vmcb_mark_dirty(vmcb, VMCB_DR);
@@ -1812,6 +1815,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu 
*vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
 
+   if (vcpu->arch.guest_state_protected)
+   return;
+
get_debugreg(vcpu->arch.db[0], 0);
get_debugreg(vcpu->arch.db[1], 1);
get_debugreg(vcpu->arch.db[2], 2);
@@ -1830,6 +1836,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned 
long value)
 {
struct vcpu_svm *svm = to_svm(vcpu);
 
+   if (vcpu->arch.guest_state_protected)
+   return;
+
svm->vmcb->save.dr7 = value;
vmcb_mark_dirty(svm->vmcb, VMCB_DR);
 }
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 8f0a3ed0d790..66ea889f71ed 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -198,6 +198,28 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
return container_of(kvm, struct kvm_svm, kvm);
 }
 
+static inline bool sev_guest(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+   return sev->active;
+#else
+   return false;
+#endif
+}
+
+static inline bool sev_es_guest(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+   return sev_guest(kvm) && sev->es_active;
+#else
+   return false;
+#endif
+}
+
 static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
 {
vmcb->control.clean = 0;
@@ -249,21 +271,24 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm)
 {
struct vmcb *vmcb = get_host_vmcb(svm);
 
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+   if (!sev_es_guest(svm->vcpu.kvm)) {
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+   }
+
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRI

[PATCH v3 07/34] KVM: SVM: Add required changes to support intercepts under SEV-ES

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

When a guest is running under SEV-ES, the hypervisor cannot access the
guest register state. There are numerous places in the KVM code where
certain registers are accessed that are not allowed to be accessed (e.g.
RIP, CR0, etc). Add checks to prevent register accesses and add intercept
update support at various points within the KVM code.

Also, when handling a VMGEXIT, exceptions are passed back through the
GHCB. Since the RDMSR/WRMSR intercepts (may) inject a #GP on error,
update the SVM intercepts to handle this for SEV-ES guests.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/svm.h |   3 +-
 arch/x86/kvm/svm/svm.c | 111 +
 arch/x86/kvm/x86.c |   6 +-
 3 files changed, 107 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1edf24f51b53..bce28482d63d 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -178,7 +178,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define LBR_CTL_ENABLE_MASK BIT_ULL(0)
 #define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
 
-#define SVM_INTERRUPT_SHADOW_MASK 1
+#define SVM_INTERRUPT_SHADOW_MASK  BIT_ULL(0)
+#define SVM_GUEST_INTERRUPT_MASK   BIT_ULL(1)
 
 #define SVM_IOIO_STR_SHIFT 2
 #define SVM_IOIO_REP_SHIFT 3
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d45b2dc5cabe..9a3d57ed997f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include "trace.h"
@@ -340,6 +341,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
 
+   /*
+* SEV-ES does not expose the next RIP. The RIP update is controlled by
+* the type of exit and the #VC handler in the guest.
+*/
+   if (sev_es_guest(vcpu->kvm))
+   goto done;
+
if (nrips && svm->vmcb->control.next_rip != 0) {
WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
svm->next_rip = svm->vmcb->control.next_rip;
@@ -351,6 +359,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
} else {
kvm_rip_write(vcpu, svm->next_rip);
}
+
+done:
svm_set_interrupt_shadow(vcpu, 0);
 
return 1;
@@ -1651,9 +1661,18 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct 
desc_ptr *dt)
 
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
-   ulong gcr0 = svm->vcpu.arch.cr0;
-   u64 *hcr0 = &svm->vmcb->save.cr0;
+   ulong gcr0;
+   u64 *hcr0;
+
+   /*
+* SEV-ES guests must always keep the CR intercepts cleared. CR
+* tracking is done using the CR write traps.
+*/
+   if (sev_es_guest(svm->vcpu.kvm))
+   return;
 
+   gcr0 = svm->vcpu.arch.cr0;
+   hcr0 = &svm->vmcb->save.cr0;
*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
| (gcr0 & SVM_CR0_SELECTIVE_MASK);
 
@@ -1673,7 +1692,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-   if (vcpu->arch.efer & EFER_LME) {
+   if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
vcpu->arch.efer |= EFER_LMA;
svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
@@ -2604,7 +2623,29 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-   return kvm_emulate_rdmsr(&svm->vcpu);
+   u32 ecx;
+   u64 data;
+
+   if (!sev_es_guest(svm->vcpu.kvm))
+   return kvm_emulate_rdmsr(&svm->vcpu);
+
+   ecx = kvm_rcx_read(&svm->vcpu);
+   if (kvm_get_msr(&svm->vcpu, ecx, &data)) {
+   trace_kvm_msr_read_ex(ecx);
+   ghcb_set_sw_exit_info_1(svm->ghcb, 1);
+   ghcb_set_sw_exit_info_2(svm->ghcb,
+   X86_TRAP_GP |
+   SVM_EVTINJ_TYPE_EXEPT |
+   SVM_EVTINJ_VALID);
+   return 1;
+   }
+
+   trace_kvm_msr_read(ecx, data);
+
+   kvm_rax_write(&svm->vcpu, data & -1u);
+   kvm_rdx_write(&svm->vcpu, (data >> 32) & -1u);
+
+   return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@ -2793,7 +2834,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr)
 
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
-   return kvm_emulate_wrmsr(&svm->vcpu);
+   u32 ecx;
+   u64 data;
+
+

[PATCH v3 06/34] KVM: x86: Mark GPRs dirty when written

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

When performing VMGEXIT processing for an SEV-ES guest, register values
will be synced between KVM and the GHCB. Prepare for detecting when a GPR
has been updated (marked dirty) in order to determine whether to sync the
register to the GHCB.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/kvm_cache_regs.h | 51 ++-
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index a889563ad02d..f15bc16de07c 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,6 +9,31 @@
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
 | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
 
+static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
+enum kvm_reg reg)
+{
+   return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
+enum kvm_reg reg)
+{
+   return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
+  enum kvm_reg reg)
+{
+   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
+  enum kvm_reg reg)
+{
+   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
 #define BUILD_KVM_GPR_ACCESSORS(lname, uname)\
 static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
 {\
@@ -18,6 +43,7 @@ static __always_inline void kvm_##lname##_write(struct 
kvm_vcpu *vcpu,  \
unsigned long val)\
 {\
vcpu->arch.regs[VCPU_REGS_##uname] = val; \
+   kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname); \
 }
 BUILD_KVM_GPR_ACCESSORS(rax, RAX)
 BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
@@ -37,31 +63,6 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
 BUILD_KVM_GPR_ACCESSORS(r15, R15)
 #endif
 
-static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
-enum kvm_reg reg)
-{
-   return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
-enum kvm_reg reg)
-{
-   return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
-static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
-  enum kvm_reg reg)
-{
-   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
-  enum kvm_reg reg)
-{
-   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
 {
if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
-- 
2.28.0



[PATCH v3 10/34] KVM: SVM: Cannot re-initialize the VMCB after shutdown with SEV-ES

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

When a SHUTDOWN VMEXIT is encountered, normally the VMCB is re-initialized
so that the guest can be re-launched. But when a guest is running as an
SEV-ES guest, the VMSA cannot be re-initialized because it has been
encrypted. For now, just return -EINVAL to prevent a possible attempt at
a guest reset.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0e5f83912b56..f353039e54b6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2026,6 +2026,13 @@ static int shutdown_interception(struct vcpu_svm *svm)
 {
struct kvm_run *kvm_run = svm->vcpu.run;
 
+   /*
+* The VM save area has already been encrypted so it
+* cannot be reinitialized - just terminate.
+*/
+   if (sev_es_guest(svm->vcpu.kvm))
+   return -EINVAL;
+
/*
 * VMCB is undefined after a SHUTDOWN intercept
 * so reinitialize it.
-- 
2.28.0



[PATCH v3 05/34] KVM: SVM: Add support for the SEV-ES VMSA

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

Allocate a page during vCPU creation to be used as the encrypted VM save
area (VMSA) for the SEV-ES guest. Provide a flag in the kvm_vcpu_arch
structure that indicates whether the guest state is protected.

When freeing a VMSA page that has been encrypted, the cache contents must
be flushed using the MSR_AMD64_VM_PAGE_FLUSH before freeing the page.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/svm/sev.c  | 64 +
 arch/x86/kvm/svm/svm.c  | 24 +++--
 arch/x86/kvm/svm/svm.h  |  5 +++
 4 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d44858b69353..7776bb18e29d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -804,6 +804,9 @@ struct kvm_vcpu_arch {
 */
bool enforce;
} pv_cpuid;
+
+   /* Protected Guests */
+   bool guest_state_protected;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 9bf5e9dadff5..151e9eab85a9 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "x86.h"
 #include "svm.h"
@@ -1190,6 +1191,69 @@ void sev_hardware_teardown(void)
sev_flush_asids();
 }
 
+/*
+ * Pages used by hardware to hold guest encrypted state must be flushed before
+ * returning them to the system.
+ */
+void sev_flush_guest_memory(struct vcpu_svm *svm, void *va, unsigned long len)
+{
+   /*
+* If hardware enforced cache coherency for encrypted mappings of the
+* same physical page is supported, nothing to do.
+*/
+   if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
+   return;
+
+   /*
+* If the VM Page Flush MSR is supported, use it to flush the page
+* (using the page virtual address and the guest ASID).
+*/
+   if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
+   struct kvm_sev_info *sev;
+   u64 start, stop;
+
+   /* Align start and stop to page boundaries. */
+   start = (u64)va & PAGE_MASK;
+   stop = PAGE_ALIGN((u64)va + len);
+
+   if (start < stop) {
+   sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+   while (start < stop) {
+   wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
+  start | sev->asid);
+
+   start += PAGE_SIZE;
+   }
+
+   return;
+   } else {
+   WARN(1, "Address overflow, using WBINVD\n");
+   }
+   }
+
+   /*
+* Hardware should always have one of the above features,
+* but if not, use WBINVD and issue a warning.
+*/
+   WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
+   wbinvd_on_all_cpus();
+}
+
+void sev_free_vcpu(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm;
+
+   if (!sev_es_guest(vcpu->kvm))
+   return;
+
+   svm = to_svm(vcpu);
+
+   if (vcpu->arch.guest_state_protected)
+   sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
+   __free_page(virt_to_page(svm->vmsa));
+}
+
 void pre_sev_run(struct vcpu_svm *svm, int cpu)
 {
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a3198b65f431..d45b2dc5cabe 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1288,6 +1288,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm;
struct page *vmcb_page;
+   struct page *vmsa_page = NULL;
int err;
 
BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
@@ -1298,9 +1299,19 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
if (!vmcb_page)
goto out;
 
+   if (sev_es_guest(svm->vcpu.kvm)) {
+   /*
+* SEV-ES guests require a separate VMSA page used to contain
+* the encrypted register state of the guest.
+*/
+   vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+   if (!vmsa_page)
+   goto error_free_vmcb_page;
+   }
+
err = avic_init_vcpu(svm);
if (err)
-   goto error_free_vmcb_page;
+   goto error_free_vmsa_page;
 
/* We initialize this flag to true to make sure that the is_running
 * bit would be set the first time the vcpu is loaded.
@@ -1310,12 +1321,16 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 
svm->msrpm = svm_vcpu_alloc_msrpm();
if (!svm->msrpm)
-   goto error_free_vmcb_page

[PATCH v3 03/34] KVM: SVM: Add support for SEV-ES capability in KVM

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

Add support to KVM for determining if a system is capable of supporting
SEV-ES as well as determining if a guest is an SEV-ES guest.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/Kconfig   |  3 ++-
 arch/x86/kvm/svm/sev.c | 47 ++
 arch/x86/kvm/svm/svm.c | 20 +-
 arch/x86/kvm/svm/svm.h | 17 ++-
 4 files changed, 66 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index f92dfd8ef10d..7ac592664c52 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -100,7 +100,8 @@ config KVM_AMD_SEV
depends on KVM_AMD && X86_64
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
help
-   Provides support for launching Encrypted VMs on AMD processors.
+ Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
+ with Encrypted State (SEV-ES) on AMD processors.
 
 config KVM_MMU_AUDIT
bool "Audit KVM MMU"
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index a4ba5476bf42..9bf5e9dadff5 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -932,7 +932,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
struct kvm_sev_cmd sev_cmd;
int r;
 
-   if (!svm_sev_enabled())
+   if (!svm_sev_enabled() || !sev)
return -ENOTTY;
 
if (!argp)
@@ -1125,29 +1125,58 @@ void sev_vm_destroy(struct kvm *kvm)
sev_asid_free(sev->asid);
 }
 
-int __init sev_hardware_setup(void)
+void __init sev_hardware_setup(void)
 {
+   unsigned int eax, ebx, ecx, edx;
+   bool sev_es_supported = false;
+   bool sev_supported = false;
+
+   /* Does the CPU support SEV? */
+   if (!boot_cpu_has(X86_FEATURE_SEV))
+   goto out;
+
+   /* Retrieve SEV CPUID information */
+   cpuid(0x801f, &eax, &ebx, &ecx, &edx);
+
/* Maximum number of encrypted guests supported simultaneously */
-   max_sev_asid = cpuid_ecx(0x801F);
+   max_sev_asid = ecx;
 
if (!svm_sev_enabled())
-   return 1;
+   goto out;
 
/* Minimum ASID value that should be used for SEV guest */
-   min_sev_asid = cpuid_edx(0x801F);
+   min_sev_asid = edx;
 
/* Initialize SEV ASID bitmaps */
sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
if (!sev_asid_bitmap)
-   return 1;
+   goto out;
 
sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
if (!sev_reclaim_asid_bitmap)
-   return 1;
+   goto out;
 
-   pr_info("SEV supported\n");
+   pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
+   sev_supported = true;
 
-   return 0;
+   /* SEV-ES support requested? */
+   if (!sev_es)
+   goto out;
+
+   /* Does the CPU support SEV-ES? */
+   if (!boot_cpu_has(X86_FEATURE_SEV_ES))
+   goto out;
+
+   /* Has the system been allocated ASIDs for SEV-ES? */
+   if (min_sev_asid == 1)
+   goto out;
+
+   pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
+   sev_es_supported = true;
+
+out:
+   sev = sev_supported;
+   sev_es = sev_es_supported;
 }
 
 void sev_hardware_teardown(void)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2f32fd09e259..a3198b65f431 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -187,9 +187,13 @@ static int vgif = true;
 module_param(vgif, int, 0444);
 
 /* enable/disable SEV support */
-static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
 module_param(sev, int, 0444);
 
+/* enable/disable SEV-ES support */
+int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+module_param(sev_es, int, 0444);
+
 static bool __read_mostly dump_invalid_vmcb = 0;
 module_param(dump_invalid_vmcb, bool, 0644);
 
@@ -959,15 +963,11 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
}
 
-   if (sev) {
-   if (boot_cpu_has(X86_FEATURE_SEV) &&
-   IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
-   r = sev_hardware_setup();
-   if (r)
-   sev = false;
-   } else {
-   sev = false;
-   }
+   if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) {
+   sev_hardware_setup();
+   } else {
+   sev = false;
+   sev_es = false;
}
 
svm_adjust_mmio_mask();
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 1d853fe4c778..af9e5910817c 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm

[PATCH v3 01/34] x86/cpu: Add VM page flush MSR availablility as a CPUID feature

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

On systems that do not have hardware enforced cache coherency between
encrypted and unencrypted mappings of the same physical page, the
hypervisor can use the VM page flush MSR (0xc001011e) to flush the cache
contents of an SEV guest page. When a small number of pages are being
flushed, this can be used in place of issuing a WBINVD across all CPUs.

CPUID 0x801f_eax[2] is used to determine if the VM page flush MSR is
available. Add a CPUID feature to indicate it is supported and define the
MSR.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/include/asm/msr-index.h   | 1 +
 arch/x86/kernel/cpu/scattered.c| 1 +
 3 files changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index dad350d42ecf..54df367b3180 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -237,6 +237,7 @@
 #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports 
the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL( 8*32+19) /* "" VMware prefers 
VMMCALL hypercall instruction */
 #define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted 
Virtualization - Encrypted State */
+#define X86_FEATURE_VM_PAGE_FLUSH  ( 8*32+21) /* "" VM Page Flush MSR is 
supported */
 
 /* Intel-defined CPU features, CPUID level 0x0007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE   ( 9*32+ 0) /* RDFSBASE, WRFSBASE, 
RDGSBASE, WRGSBASE instructions*/
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 972a34d93505..abfc9b0fbd8d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -470,6 +470,7 @@
 #define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
 #define MSR_AMD64_IBSOPDATA4   0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_VM_PAGE_FLUSH0xc001011e
 #define MSR_AMD64_SEV_ES_GHCB  0xc0010130
 #define MSR_AMD64_SEV  0xc0010131
 #define MSR_AMD64_SEV_ENABLED_BIT  0
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 866c9a9bcdee..236924930bf0 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -44,6 +44,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_SEV,  CPUID_EAX,  1, 0x801f, 0 },
{ X86_FEATURE_SEV_ES,   CPUID_EAX,  3, 0x801f, 0 },
{ X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x801f, 0 },
+   { X86_FEATURE_VM_PAGE_FLUSH,CPUID_EAX,  2, 0x801f, 0 },
{ 0, 0, 0, 0, 0 }
 };
 
-- 
2.28.0



[PATCH v3 04/34] KVM: SVM: Add GHCB accessor functions for retrieving fields

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

Update the GHCB accessor functions to add functions for retrieve GHCB
fields by name. Update existing code to use the new accessor functions.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/svm.h   | 10 ++
 arch/x86/kernel/cpu/vmware.c | 12 ++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 71d630bb5e08..1edf24f51b53 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -379,6 +379,16 @@ struct vmcb {
(unsigned long *)&ghcb->save.valid_bitmap); 
\
}   
\

\
+   static inline u64 ghcb_get_##field(struct ghcb *ghcb)   
\
+   {   
\
+   return ghcb->save.field;
\
+   }   
\
+   
\
+   static inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb)
\
+   {   
\
+   return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0;
\
+   }   
\
+   
\
static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value)   
\
{   
\
__set_bit(GHCB_BITMAP_IDX(field),   
\
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 924571fe5864..c6ede3b3d302 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -501,12 +501,12 @@ static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, 
struct pt_regs *regs)
  ghcb_rbp_is_valid(ghcb)))
return false;
 
-   regs->bx = ghcb->save.rbx;
-   regs->cx = ghcb->save.rcx;
-   regs->dx = ghcb->save.rdx;
-   regs->si = ghcb->save.rsi;
-   regs->di = ghcb->save.rdi;
-   regs->bp = ghcb->save.rbp;
+   regs->bx = ghcb_get_rbx(ghcb);
+   regs->cx = ghcb_get_rcx(ghcb);
+   regs->dx = ghcb_get_rdx(ghcb);
+   regs->si = ghcb_get_rsi(ghcb);
+   regs->di = ghcb_get_rdi(ghcb);
+   regs->bp = ghcb_get_rbp(ghcb);
 
return true;
 }
-- 
2.28.0



[PATCH v3 02/34] KVM: SVM: Remove the call to sev_platform_status() during setup

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

When both KVM support and the CCP driver are built into the kernel instead
of as modules, KVM initialization can happen before CCP initialization. As
a result, sev_platform_status() will return a failure when it is called
from sev_hardware_setup(), when this isn't really an error condition.

Since sev_platform_status() doesn't need to be called at this time anyway,
remove the invocation from sev_hardware_setup().

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 22 +-
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index c0b14106258a..a4ba5476bf42 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1127,9 +1127,6 @@ void sev_vm_destroy(struct kvm *kvm)
 
 int __init sev_hardware_setup(void)
 {
-   struct sev_user_data_status *status;
-   int rc;
-
/* Maximum number of encrypted guests supported simultaneously */
max_sev_asid = cpuid_ecx(0x801F);
 
@@ -1148,26 +1145,9 @@ int __init sev_hardware_setup(void)
if (!sev_reclaim_asid_bitmap)
return 1;
 
-   status = kmalloc(sizeof(*status), GFP_KERNEL);
-   if (!status)
-   return 1;
-
-   /*
-* Check SEV platform status.
-*
-* PLATFORM_STATUS can be called in any state, if we failed to query
-* the PLATFORM status then either PSP firmware does not support SEV
-* feature or SEV firmware is dead.
-*/
-   rc = sev_platform_status(status, NULL);
-   if (rc)
-   goto err;
-
pr_info("SEV supported\n");
 
-err:
-   kfree(status);
-   return rc;
+   return 0;
 }
 
 void sev_hardware_teardown(void)
-- 
2.28.0



[PATCH v3 00/34] SEV-ES hypervisor support

2020-11-09 Thread Tom Lendacky
From: Tom Lendacky 

This patch series provides support for running SEV-ES guests under KVM.

Secure Encrypted Virtualization - Encrypted State (SEV-ES) expands on the
SEV support to protect the guest register state from the hypervisor. See
"AMD64 Architecture Programmer's Manual Volume 2: System Programming",
section "15.35 Encrypted State (SEV-ES)" [1].

In order to allow a hypervisor to perform functions on behalf of a guest,
there is architectural support for notifying a guest's operating system
when certain types of VMEXITs are about to occur. This allows the guest to
selectively share information with the hypervisor to satisfy the requested
function. The notification is performed using a new exception, the VMM
Communication exception (#VC). The information is shared through the
Guest-Hypervisor Communication Block (GHCB) using the VMGEXIT instruction.
The GHCB format and the protocol for using it is documented in "SEV-ES
Guest-Hypervisor Communication Block Standardization" [2].

Under SEV-ES, a vCPU save area (VMSA) must be encrypted. SVM is updated to
build the initial VMSA and then encrypt it before running the guest. Once
encrypted, it must not be modified by the hypervisor. Modification of the
VMSA will result in the VMRUN instruction failing with a SHUTDOWN exit
code. KVM must support the VMGEXIT exit code in order to perform the
necessary functions required of the guest. The GHCB is used to exchange
the information needed by both the hypervisor and the guest.

Register data from the GHCB is copied into the KVM register variables and
accessed as usual during handling of the exit. Upon return to the guest,
updated registers are copied back to the GHCB for the guest to act upon.

There are changes to some of the intercepts that are needed under SEV-ES.
For example, CR0 writes cannot be intercepted, so the code needs to ensure
that the intercept is not enabled during execution or that the hypervisor
does not try to read the register as part of exit processing. Another
example is shutdown processing, where the vCPU cannot be directly reset.

Support is added to handle VMGEXIT events and implement the GHCB protocol.
This includes supporting standard exit events, like a CPUID instruction
intercept, to new support, for things like AP processor booting. Much of
the existing SVM intercept support can be re-used by setting the exit
code information from the VMGEXIT and calling the appropriate intercept
handlers.

Finally, to launch and run an SEV-ES guest requires changes to the vCPU
initialization, loading and execution.

[1] https://www.amd.com/system/files/TechDocs/24593.pdf
[2] https://developer.amd.com/wp-content/resources/56421.pdf

---

These patches are based on the KVM next branch:
https://git.kernel.org/pub/scm/virt/kvm/kvm.git next

6d6a18fdde8b ("KVM: selftests: allow two iterations of dirty_log_perf_test")

A version of the tree can also be found at:
https://github.com/AMDESE/linux/tree/sev-es-v3

Changes from v2:
- Update the freeing of the VMSA page to account for the encrypted memory
  cache coherency feature as well as the VM page flush feature.
- Update the GHCB dump function with a bit more detail.
- Don't check for RAX being present as part of a string IO operation.
- Include RSI when syncing from GHCB to support KVM hypercall arguments.
- Add GHCB usage field validation check.

Changes from v1:
- Removed the VMSA indirection support:
  - On LAUNCH_UPDATE_VMSA, sync traditional VMSA over to the new SEV-ES
VMSA area to be encrypted.
  - On VMGEXIT VMEXIT, directly copy valid registers into vCPU arch
register array from GHCB. On VMRUN (following a VMGEXIT), directly
copy dirty vCPU arch registers to GHCB.
  - Removed reg_read_override()/reg_write_override() KVM ops.
- Added VMGEXIT exit-reason validation.
- Changed kvm_vcpu_arch variable vmsa_encrypted to guest_state_protected
- Updated the tracking support for EFER/CR0/CR4/CR8 to minimize changes
  to the x86.c code
- Updated __set_sregs to not set any register values (previously supported
  setting the tracked values of EFER/CR0/CR4/CR8)
- Added support for reporting SMM capability at the VM-level. This allows
  an SEV-ES guest to indicate SMM is not supported
- Updated FPU support to check for a guest FPU save area before using it.
  Updated SVM to free guest FPU for an SEV-ES guest during KVM create_vcpu
  op.
- Removed changes to the kvm_skip_emulated_instruction()
- Added VMSA validity checks before invoking LAUNCH_UPDATE_VMSA
- Minor code restructuring in areas for better readability

Cc: Paolo Bonzini 
Cc: Jim Mattson 
Cc: Joerg Roedel 
Cc: Sean Christopherson 
Cc: Vitaly Kuznetsov 
Cc: Wanpeng Li 
Cc: Borislav Petkov 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Brijesh Singh 

Tom Lendacky (34):
  x86/cpu: Add VM page flush MSR availablility as a CPUID feature
  KVM: SVM: Remove the call to sev_platform_status() during setup
  KVM: SVM: Add support for SEV-ES ca

Re: [PATCH] x86/mm/sme: Fix definition of PMD_FLAGS_DEC_WP

2020-11-09 Thread Tom Lendacky
On 11/9/20 11:35 AM, Arvind Sankar wrote:
> The PAT bit is in different locations for 4k and 2M/1G page table
> entries.
> 
> Add a definition for _PAGE_LARGE_CACHE_MASK to represent the three
> caching bits (PWT, PCD, PAT), similar to _PAGE_CACHE_MASK for 4k pages,
> and use it in the definition of PMD_FLAGS_DEC_WP to get the correct PAT
> index for write-protected pages.
> 
> Remove a duplication definition of _PAGE_PAT_LARGE.
> 
> Signed-off-by: Arvind Sankar 

Fixes: tag?

Tested-by: Tom Lendacky 

> ---
>  arch/x86/include/asm/pgtable_types.h | 3 +--
>  arch/x86/mm/mem_encrypt_identity.c   | 4 ++--
>  2 files changed, 3 insertions(+), 4 deletions(-)
> 


Re: Definition of PMD_FLAGS_DEC_WP in arch/x86/mm/mem_encrypt_identity.c

2020-11-09 Thread Tom Lendacky
On 11/8/20 10:37 AM, Arvind Sankar wrote:
> Hi, I have a question about this definition in
> arch/x86/mm/mem_encrypt_identity.c:
> 
>   #define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & 
> ~_PAGE_GLOBAL)
> 
>   #define PMD_FLAGS_DEC   PMD_FLAGS_LARGE
>   #define PMD_FLAGS_DEC_WP((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
>(_PAGE_PAT | _PAGE_PWT))
> 
> _PAGE_CACHE_MASK and _PAGE_PAT are for 4k pages, not 2M pages. The
> definition of PMD_FLAGS_DEC_WP clears the PSE bit by masking out
> _PAGE_CACHE_MASK, and sets it again by setting _PAGE_PAT, resulting in
> PMD_FLAGS_DEC_WP actually being write-through, not write-protected,
> using PAT index 1.
> 
> Shouldn't the definition be
> 
>   #define PMD_FLAGS_DEC_WP(PMD_FLAGS_DEC | _PAGE_PAT_LARGE | 
> _PAGE_PWT)
> 
> for write-protected using PAT index 5?

Yes it should. There should probably be a _PAGE_CACHE_MASK_LARGE
definition so that the end result is:

#define PMD_FLAGS_DEC_WP((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK_LARGE) | \
(_PAGE_PAT_LARGE | _PAGE_PWT)

> 
> I guess the difference doesn't actually matter for encrypt-in-place? But
> mem_encrypt_boot.S takes pains to initialize PA5 to be write-protected,
> and it looks like it won't actually be used.

Given how early in the boot everything occurs and the cache flushing that
is performed related to the operations, it works. But this should be
fixed.

Are you planning on sending a patch?

Thanks,
Tom

> 
> Thanks.
> 


Re: [PATCH v2] x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP

2020-11-05 Thread Tom Lendacky

On 11/4/20 11:33 PM, Anand K Mistry wrote:

On AMD CPUs which have the feature X86_FEATURE_AMD_STIBP_ALWAYS_ON,
STIBP is set to on and 'spectre_v2_user_stibp ==
SPECTRE_V2_USER_STRICT_PREFERRED'. At the same time, IBPB can be set to
conditional. However, this leads to the case where it's impossible to
turn on IBPB for a process because in the PR_SPEC_DISABLE case in
ib_prctl_set, the (spectre_v2_user_stibp ==
SPECTRE_V2_USER_STRICT_PREFERRED) condition leads to a return before the
task flag is set. Similarly, ib_prctl_get will return PR_SPEC_DISABLE
even though IBPB is set to conditional.

More generally, the following cases are possible:
1. STIBP = conditional && IBPB = on for spectre_v2_user=seccomp,ibpb
2. STIBP = on && IBPB = conditional for AMD CPUs with
X86_FEATURE_AMD_STIBP_ALWAYS_ON

The first case functions correctly today, but only because
spectre_v2_user_ibpb isn't updated to reflect the IBPB mode.

At a high level, this change does one thing. If either STIBP or IBPB is
set to conditional, allow the prctl to change the task flag. Also,
reflect that capability when querying the state. This isn't perfect
since it doesn't take into account if only STIBP or IBPB is
unconditionally on. But it allows the conditional feature to work as
expected, without affecting the unconditional one.

Signed-off-by: Anand K Mistry 


Does it need a Fixes: tag?

Acked-by: Tom Lendacky 



---

Changes in v2:
- Fix typo in commit message
- s/is_spec_ib_user/is_spec_ib_user_controlled
- Update comment in ib_prctl_set() to reference X86_FEATURE_AMD_STIBP_ALWAYS_ON
- Have is_spec_ib_user_controlled() check both IBPB and STIBP modes

  arch/x86/kernel/cpu/bugs.c | 46 +++---
  1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d3f0db463f96..534225afe832 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1254,6 +1254,14 @@ static int ssb_prctl_set(struct task_struct *task, 
unsigned long ctrl)
return 0;
  }
  
+static bool is_spec_ib_user_controlled(void)

+{
+   return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
+   spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+   spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+   spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP;
+}
+
  static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
  {
switch (ctrl) {
@@ -1262,13 +1270,20 @@ static int ib_prctl_set(struct task_struct *task, 
unsigned long ctrl)
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return 0;
/*
-* Indirect branch speculation is always disabled in strict
-* mode. It can neither be enabled if it was force-disabled
-* by a  previous prctl call.
+* With strict mode for both IBPB and STIBP, the instruction
+* code paths avoid checking this task flag and instead,
+* unconditionally run the instruction. However, STIBP and IBPB
+* are independent and either can be set to conditionally
+* enabled regardless of the mode of the other. If either is set
+* to conditional, allow the task flag to be updated, unless it
+* was force-disabled by a previous prctl call.
+* Currently, this is possible on an AMD CPU which has the
+* feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the
+* kernel is booted with 'spectre_v2_user=seccomp', then
+* spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and
+* spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED.
 */
-   if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
-   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
-   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+   if (!is_spec_ib_user_controlled() ||
task_spec_ib_force_disable(task))
return -EPERM;
task_clear_spec_ib_disable(task);
@@ -1283,9 +1298,7 @@ static int ib_prctl_set(struct task_struct *task, 
unsigned long ctrl)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return -EPERM;
-   if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
-   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
-   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+   if (!is_spec_ib_user_controlled())
return 0;
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
@@ -1351

Re: [PATCH 1/1] x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP

2020-10-31 Thread Tom Lendacky

On 10/29/20 1:51 AM, Anand K Mistry wrote:

On AMD CPUs which have the feature X86_FEATURE_AMD_STIBP_ALWAYS_ON,
STIBP is set to on and 'spectre_v2_user_stibp ==
SPECTRE_V2_USER_STRICT_PREFERRED'. At the same time, IBPB can be set to
conditional. However, this leads to the case where it's impossible to
turn on IBPB for a process because in the PR_SPEC_DISABLE case in
ib_prctl_set, the (spectre_v2_user_stibp ==
SPECTRE_V2_USER_STRICT_PREFERRED) condition leads to a return before the
task flag is set. Similarly, ib_prctl_get will return PR_SPEC_DISABLE
even though IBPB is set to conditional.

More generally, the following cases are possible:
1. STIBP = conditional && IBPB = on for spectre_v2_user=seccomp,ibpb
2. STIBP = on && IBPB = conditional for AMD CPUs with
X86_FEATURE_AMD_STIBP_ALWAYS_ON

The first case functions correctly today, but only because
spectre_v2_user_ibpb isn't updated to reflect the IBPB mode.

At a high level, this change does one thing. If either STIBP is IBPB is


s/STIBP is IBPB/STIBP or IBPB/


set to conditional, allow the prctl to change the task flag. Also,
reflect that capability when querying the state. This isn't perfect
since it doesn't take into account if only STIBP or IBPB is
unconditionally on. But it allows the conditional feature to work as
expected, without affecting the unconditional one.

Signed-off-by: Anand K Mistry 

---

  arch/x86/kernel/cpu/bugs.c | 41 +-
  1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d3f0db463f96..fb64e02eed6f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1254,6 +1254,11 @@ static int ssb_prctl_set(struct task_struct *task, 
unsigned long ctrl)
return 0;
  }
  
+static bool is_spec_ib_user(enum spectre_v2_user_mitigation mode)


Maybe something like is_spec_ib_user_controlled() would be a better name.


+{
+   return mode == SPECTRE_V2_USER_PRCTL || mode == SPECTRE_V2_USER_SECCOMP;
+}
+


I like the idea of passing in the mode you want to check, but it appears 
they are never used independently. The ibpb and stibp modes are always 
checked together in one of the if statements below, so you could make this 
a function that checks both modes and just have a single call. I'll leave 
that up to the maintainers to see what is preferred.



  static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
  {
switch (ctrl) {
@@ -1262,13 +1267,16 @@ static int ib_prctl_set(struct task_struct *task, 
unsigned long ctrl)
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return 0;
/*
-* Indirect branch speculation is always disabled in strict
-* mode. It can neither be enabled if it was force-disabled
-* by a  previous prctl call.
+* With strict mode for both IBPB and STIBP, the instruction
+* code paths avoid checking this task flag and instead,
+* unconditionally run the instruction. However, STIBP and IBPB
+* are independent and either can be set to conditionally
+* enabled regardless of the mode of the other. If either is set
+* to conditional, allow the task flag to be updated, unless it
+* was force-disabled by a previous prctl call.


You probably want to reference the STIBP always on mode that allows this 
situation.



 */
-   if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
-   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
-   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+   if ((!is_spec_ib_user(spectre_v2_user_ibpb) &&
+!is_spec_ib_user(spectre_v2_user_stibp)) ||
task_spec_ib_force_disable(task))
return -EPERM;
task_clear_spec_ib_disable(task);
@@ -1283,9 +1291,8 @@ static int ib_prctl_set(struct task_struct *task, 
unsigned long ctrl)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return -EPERM;
-   if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
-   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
-   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+   if (!is_spec_ib_user(spectre_v2_user_ibpb) &&
+   !is_spec_ib_user(spectre_v2_user_stibp))


The set function seems reasonable to me.


return 0;
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
@@ -1351,20 +1358,18 @@ static int ib_prctl_get(struct task_struct *task)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp

Re: [PATCH 0/1] x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP

2020-10-31 Thread Tom Lendacky

On 10/29/20 1:51 AM, Anand K Mistry wrote:

When attempting to do some performance testing of IBPB on and AMD
platform, I noticed the IBPB instruction was never being issued, even
though it was conditionally on and various seccomp protected processes
were force enabling it. Turns out, on those AMD CPUs, STIBP is set to
always-on and this was causing an early-out on the prctl() which turns
off IB speculation. Here is my attempt to fix it.

I'm hoping someone that understands this better than me can explain why
I'm wrong.


It all looks reasonable to me (some comments in the patch to follow). The 
thing that makes this tough is the command line option of being able to 
force IBPB using the "prctl,ibpb" or "seccomp,ibpb" while STIBP is prctl 
or seccomp controlled. There's an inherent quality that is assumed that if 
STIBP is forced then IBPB must be forced and it looks like 21998a351512 
("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced 
IBRS.") used that. However, with the STIBP always on support, that doesn't 
hold true.


Thanks,
Tom




Anand K Mistry (1):
   x86/speculation: Allow IBPB to be conditionally enabled on CPUs with
 always-on STIBP

  arch/x86/kernel/cpu/bugs.c | 41 +-
  1 file changed, 23 insertions(+), 18 deletions(-)



Re: [PATCH v4 4/5] x86/head/64: Check SEV encryption before switching to kernel page-table

2020-10-28 Thread Tom Lendacky
On 10/28/20 11:46 AM, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> When SEV is enabled the kernel requests the C-Bit position again from
> the hypervisor to built its own page-table. Since the hypervisor is an

s/built/build/

> untrusted source the C-bit position needs to be verified before the
> kernel page-table is used.
> 
> Call the sev_verify_cbit() function before writing the CR3.
> 
> Signed-off-by: Joerg Roedel 

Reviewed-by: Tom Lendacky 

> ---
>  arch/x86/kernel/head_64.S | 16 
>  arch/x86/mm/mem_encrypt.c |  1 +
>  2 files changed, 17 insertions(+)
> 
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 7eb2a1c87969..3c417734790f 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -161,6 +161,21 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, 
> SYM_L_GLOBAL)
>  
>   /* Setup early boot stage 4-/5-level pagetables. */
>   addqphys_base(%rip), %rax
> +
> + /*
> +  * For SEV guests: Verify that the C-bit is correct. A malicious
> +  * hypervisor could lie about the C-bit position to perform a ROP
> +  * attack on the guest by writing to the unencrypted stack and wait for
> +  * the next RET instruction.
> +  * %rsi carries pointer to realmode data and is callee-clobbered. Save
> +  * and restore it.
> +  */
> + pushq   %rsi
> + movq%rax, %rdi
> + callsev_verify_cbit
> + popq%rsi
> +
> + /* Switch to new page-table */
>   movq%rax, %cr3
>  
>   /* Ensure I am executing from virtual addresses */
> @@ -279,6 +294,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, 
> SYM_L_GLOBAL)
>  SYM_CODE_END(secondary_startup_64)
>  
>  #include "verify_cpu.S"
> +#include "sev_verify_cbit.S"
>  
>  #ifdef CONFIG_HOTPLUG_CPU
>  /*
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index efbb3de472df..bc0833713be9 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -39,6 +39,7 @@
>   */
>  u64 sme_me_mask __section(".data") = 0;
>  u64 sev_status __section(".data") = 0;
> +u64 sev_check_data __section(".data") = 0;
>  EXPORT_SYMBOL(sme_me_mask);
>  DEFINE_STATIC_KEY_FALSE(sev_enable_key);
>  EXPORT_SYMBOL_GPL(sev_enable_key);
> 


Re: [PATCH v4 5/5] x86/sev-es: Do not support MMIO to/from encrypted memory

2020-10-28 Thread Tom Lendacky
On 10/28/20 11:46 AM, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> MMIO memory is usually not mapped encrypted, so there is no reason to
> support emulated MMIO when it is mapped encrypted.
> 
> Prevent a possible hypervisor attack where a RAM page is mapped as
> an MMIO page in the nested page-table, so that any guest access to it
> will trigger a #VC exception and leak the data on that page to the
> hypervisor via the GHCB (like with valid MMIO). On the read side this
> attack would allow the HV to inject data into the guest.
> 
> Signed-off-by: Joerg Roedel 

Reviewed-by: Tom Lendacky 

> ---
>  arch/x86/kernel/sev-es.c | 20 +---
>  1 file changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
> index 4a96726fbaf8..0bd1a0fc587e 100644
> --- a/arch/x86/kernel/sev-es.c
> +++ b/arch/x86/kernel/sev-es.c
> @@ -374,8 +374,8 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
>   return ES_EXCEPTION;
>  }
>  
> -static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
> -  unsigned long vaddr, phys_addr_t *paddr)
> +static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct 
> es_em_ctxt *ctxt,
> +unsigned long vaddr, phys_addr_t 
> *paddr)
>  {
>   unsigned long va = (unsigned long)vaddr;
>   unsigned int level;
> @@ -394,15 +394,19 @@ static bool vc_slow_virt_to_phys(struct ghcb *ghcb, 
> struct es_em_ctxt *ctxt,
>   if (user_mode(ctxt->regs))
>   ctxt->fi.error_code |= X86_PF_USER;
>  
> - return false;
> + return ES_EXCEPTION;
>   }
>  
> + if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
> + /* Emulated MMIO to/from encrypted memory not supported */
> + return ES_UNSUPPORTED;
> +
>   pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
>   pa |= va & ~page_level_mask(level);
>  
>   *paddr = pa;
>  
> - return true;
> + return ES_OK;
>  }
>  
>  /* Include code shared with pre-decompression boot stage */
> @@ -731,6 +735,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, 
> struct es_em_ctxt *ctxt,
>  {
>   u64 exit_code, exit_info_1, exit_info_2;
>   unsigned long ghcb_pa = __pa(ghcb);
> + enum es_result res;
>   phys_addr_t paddr;
>   void __user *ref;
>  
> @@ -740,11 +745,12 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, 
> struct es_em_ctxt *ctxt,
>  
>   exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
>  
> - if (!vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr)) {
> - if (!read)
> + res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
> + if (res != ES_OK) {
> + if (res == ES_EXCEPTION && !read)
>   ctxt->fi.error_code |= X86_PF_WRITE;
>  
> - return ES_EXCEPTION;
> + return res;
>   }
>  
>   exit_info_1 = paddr;
> 


Re: [PATCH v4 3/5] x86/boot/compressed/64: Check SEV encryption in 64-bit boot-path

2020-10-28 Thread Tom Lendacky
On 10/28/20 11:46 AM, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> Check whether the hypervisor reported the correct C-bit when running as
> an SEV guest. Using a wrong C-bit position could be used to leak
> sensitive data from the guest to the hypervisor.
> 
> The check function is in arch/x86/kernel/sev_verify_cbit.S so that it
> can be re-used in the running kernel image.
> 
> Signed-off-by: Joerg Roedel 

Just one minor comment below, otherwise:

Reviewed-by: Tom Lendacky 

> ---
>  arch/x86/boot/compressed/ident_map_64.c |  1 +
>  arch/x86/boot/compressed/mem_encrypt.S  |  4 ++
>  arch/x86/boot/compressed/misc.h |  2 +
>  arch/x86/kernel/sev_verify_cbit.S   | 90 +
>  4 files changed, 97 insertions(+)
>  create mode 100644 arch/x86/kernel/sev_verify_cbit.S
> 
> diff --git a/arch/x86/boot/compressed/ident_map_64.c 
> b/arch/x86/boot/compressed/ident_map_64.c
> index a5e5db6ada3c..39b2eded7bc2 100644
> --- a/arch/x86/boot/compressed/ident_map_64.c
> +++ b/arch/x86/boot/compressed/ident_map_64.c
> @@ -164,6 +164,7 @@ void initialize_identity_maps(void *rmode)
>   add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
>  
>   /* Load the new page-table. */
> + sev_verify_cbit(top_level_pgt);
>   write_cr3(top_level_pgt);
>  }
>  
> diff --git a/arch/x86/boot/compressed/mem_encrypt.S 
> b/arch/x86/boot/compressed/mem_encrypt.S
> index 0bae1ca658d9..3275dbab085d 100644
> --- a/arch/x86/boot/compressed/mem_encrypt.S
> +++ b/arch/x86/boot/compressed/mem_encrypt.S
> @@ -68,6 +68,9 @@ SYM_FUNC_START(get_sev_encryption_bit)
>  SYM_FUNC_END(get_sev_encryption_bit)
>  
>   .code64
> +
> +#include "../../kernel/sev_verify_cbit.S"
> +
>  SYM_FUNC_START(set_sev_encryption_mask)
>  #ifdef CONFIG_AMD_MEM_ENCRYPT
>   push%rbp
> @@ -111,4 +114,5 @@ SYM_FUNC_END(set_sev_encryption_mask)
>   .balign 8
>  SYM_DATA(sme_me_mask,.quad 0)
>  SYM_DATA(sev_status, .quad 0)
> +SYM_DATA(sev_check_data, .quad 0)
>  #endif
> diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
> index 6d31f1b4c4d1..d9a631c5973c 100644
> --- a/arch/x86/boot/compressed/misc.h
> +++ b/arch/x86/boot/compressed/misc.h
> @@ -159,4 +159,6 @@ void boot_page_fault(void);
>  void boot_stage1_vc(void);
>  void boot_stage2_vc(void);
>  
> +unsigned long sev_verify_cbit(unsigned long cr3);
> +
>  #endif /* BOOT_COMPRESSED_MISC_H */
> diff --git a/arch/x86/kernel/sev_verify_cbit.S 
> b/arch/x86/kernel/sev_verify_cbit.S
> new file mode 100644
> index ..b96f0573f8af
> --- /dev/null
> +++ b/arch/x86/kernel/sev_verify_cbit.S
> @@ -0,0 +1,90 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + *   sev_verify_cbit.S - Code for verification of the C-bit position reported
> + *   by the Hypervisor when running with SEV enabled.
> + *
> + *   Copyright (c) 2020  Joerg Roedel (jroe...@suse.de)
> + *
> + * Implements sev_verify_cbit() which is called before switching to a new
> + * long-mode page-table at boot.
> + *
> + * It verifies that the C-bit position is correct by writing a random value 
> to
> + * an encrypted memory location while on the current page-table. Then it
> + * switches to the new page-table to verify the memory content is still the
> + * same. After that it switches back to the current page-table and when the
> + * check succeeded it returns. If the check failed the code invalidates the
> + * stack pointer and goes into a hlt loop. The stack-pointer is invalidated 
> to
> + * make sure no interrupt or exception can get the CPU out of the hlt loop.
> + *
> + * New page-table pointer is expected in %rdi (first parameter)
> + *
> + */
> +SYM_FUNC_START(sev_verify_cbit)
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> + /* First check if a C-bit was detected */
> + movqsme_me_mask(%rip), %rsi
> + testq   %rsi, %rsi
> + jz  3f
> +
> + /* sme_me_mask != 0 could mean SME or SEV - Check also for SEV */
> + movqsev_status(%rip), %rsi
> + testq   %rsi, %rsi
> + jz  3f
> +
> + /* Save CR4 in %rsi */
> + movq%cr4, %rsi
> +
> + /* Disable Global Pages */
> + movq%rsi, %rdx
> + andq$(~X86_CR4_PGE), %rdx
> + movq%rdx, %cr4
> +
> + /*
> +  * Verified that running under SEV - now get a random value using
> +  * RDRAND. This instruction is mandatory when running as an SEV guest.
> +  *
> +  * Don't bail out of the loop if RDRAND returns errors. It is better to
> +  * prevent forward progress than to work with a non-random value here.
&g

Re: [PATCH v4 2/5] x86/boot/compressed/64: Add CPUID sanity check to early #VC handler

2020-10-28 Thread Tom Lendacky
On 10/28/20 11:46 AM, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> The early #VC handler which doesn't have a GHCB can only handle CPUID
> exit codes. It is needed by the early boot code to handle #VC
> exceptions raised in verify_cpu() and to get the position of the C
> bit.
> 
> But the CPUID information comes from the hypervisor, which is untrusted
> and might return results which trick the guest into the no-SEV boot path
> with no C bit set in the page-tables. All data written to memory would
> then be unencrypted and could leak sensitive data to the hypervisor.
> 
> Add sanity checks to the early #VC handlers to make sure the hypervisor
> can not pretend that SEV is disabled.
> 
> Signed-off-by: Joerg Roedel 

Reviewed-by: Tom Lendacky 

> ---
>  arch/x86/kernel/sev-es-shared.c | 26 ++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c
> index 5f83ccaab877..56d16c405b03 100644
> --- a/arch/x86/kernel/sev-es-shared.c
> +++ b/arch/x86/kernel/sev-es-shared.c
> @@ -178,6 +178,32 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned 
> long exit_code)
>   goto fail;
>   regs->dx = val >> 32;
>  
> + /*
> +  * This is a VC handler and the #VC is only raised when SEV-ES is
> +  * active, which means SEV must be active too. Do sanity checks on the
> +  * CPUID results to make sure the hypervisor does not trick the kernel
> +  * into the no-sev path. This could map sensitive data unencrypted and
> +  * make it accessible to the hypervisor.
> +  *
> +  * In particular, check for:
> +  *  - Hypervisor CPUID bit
> +  *  - Availability of CPUID leaf 0x801f
> +  *  - SEV CPUID bit.
> +  *
> +  * The hypervisor might still report the wrong C-bit position, but this
> +  * can't be checked here.
> +  */
> +
> + if ((fn == 1 && !(regs->cx & BIT(31
> + /* Hypervisor bit */
> + goto fail;
> + else if (fn == 0x8000 && (regs->ax < 0x801f))
> + /* SEV Leaf check */
> + goto fail;
> + else if ((fn == 0x801f && !(regs->ax & BIT(1
> + /* SEV Bit */
> + goto fail;
> +
>   /* Skip over the CPUID two-byte opcode */
>   regs->ip += 2;
>  
> 


Re: [PATCH v4 1/5] x86/boot/compressed/64: Introduce sev_status

2020-10-28 Thread Tom Lendacky
On 10/28/20 11:46 AM, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> Introduce sev_status and initialize it together with sme_me_mask to have
> an indicator which SEV features are enabled.
> 
> Signed-off-by: Joerg Roedel 

Reviewed-by: Tom Lendacky 

> ---
>  arch/x86/boot/compressed/mem_encrypt.S | 16 +++-
>  1 file changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/boot/compressed/mem_encrypt.S 
> b/arch/x86/boot/compressed/mem_encrypt.S
> index dd07e7b41b11..0bae1ca658d9 100644
> --- a/arch/x86/boot/compressed/mem_encrypt.S
> +++ b/arch/x86/boot/compressed/mem_encrypt.S
> @@ -81,6 +81,19 @@ SYM_FUNC_START(set_sev_encryption_mask)
>  
>   bts %rax, sme_me_mask(%rip) /* Create the encryption mask */
>  
> + /*
> +  * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in
> +  * get_sev_encryption_bit() because this function is 32 bit code and
> +  * shared between 64 bit and 32 bit boot path.
> +  */
> + movl$MSR_AMD64_SEV, %ecx/* Read the SEV MSR */
> + rdmsr
> +
> + /* Store MSR value in sev_status */
> + shlq$32, %rdx
> + orq %rdx, %rax
> + movq%rax, sev_status(%rip)
> +
>  .Lno_sev_mask:
>   movq%rbp, %rsp  /* Restore original stack pointer */
>  
> @@ -96,5 +109,6 @@ SYM_FUNC_END(set_sev_encryption_mask)
>  
>  #ifdef CONFIG_AMD_MEM_ENCRYPT
>   .balign 8
> -SYM_DATA(sme_me_mask, .quad 0)
> +SYM_DATA(sme_me_mask,.quad 0)
> +SYM_DATA(sev_status, .quad 0)
>  #endif
> 


Re: [RFCv2 15/16] KVM: Unmap protected pages from direct mapping

2020-10-26 Thread Tom Lendacky
On 10/20/20 7:18 AM, David Hildenbrand wrote:
> On 20.10.20 08:18, Kirill A. Shutemov wrote:
>> If the protected memory feature enabled, unmap guest memory from
>> kernel's direct mappings.
> 
> Gah, ugly. I guess this also defeats compaction, swapping, ... oh gosh.
> As if all of the encrypted VM implementations didn't bring us enough
> ugliness already (SEV extensions also don't support reboots, but can at
> least kexec() IIRC).

SEV does support reboot. SEV-ES using Qemu doesn't support reboot because
of the way Qemu resets the vCPU state. If Qemu could relaunch the guest
through the SEV APIs to reset the vCPU state, then a "reboot" would be
possible.

SEV does support kexec, SEV-ES does not at the moment.

Thanks,
Tom

> 
> Something similar is done with secretmem [1]. And people don't seem to
> like fragmenting the direct mapping (including me).
> 
> [1] 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.kernel.org%2Fr%2F20200924132904.1391-1-rppt%40kernel.org&data=04%7C01%7Cthomas.lendacky%40amd.com%7Cb98a5033da37432131b508d874f25194%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637387931403890525%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=BzC%2FeIyOau7BORuUY%2BaiRzYZ%2BOAHANvBDcmV9hpkrts%3D&reserved=0
> 


Re: default cpufreq gov, was: [PATCH] sched/fair: check for idle core

2020-10-23 Thread Tom Lendacky

On 10/23/20 2:03 AM, Peter Zijlstra wrote:

On Thu, Oct 22, 2020 at 10:10:35PM +0200, Giovanni Gherdovich wrote:

* for the AMD EPYC machines we haven't yet implemented frequency invariant
   accounting, which might explain why schedutil looses to ondemand on all
   the benchmarks.


Right, I poked the AMD people on that a few times, but nothing seems to
be forthcoming :/ Tom, any way you could perhaps expedite the matter?


Adding Nathan to the thread to help out here.

Thanks,
Tom



In particular we're looking for some X86_VENDOR_AMD/HYGON code to run in

   arch/x86/kernel/smpboot.c:init_freq_invariance()

The main issue is finding a 'max' frequency that is not the absolute max
turbo boost (this could result in not reaching it very often) but also
not too low such that we're always clipping.

And while we're here, IIUC AMD is still using acpi_cpufreq, but AFAIK
the chips have a CPPC interface which could be used instead. Is there
any progress on that?



Re: AMD SME encrpytion and PCI BAR pages to user space

2020-10-21 Thread Tom Lendacky
On 10/21/20 6:59 AM, Jason Gunthorpe wrote:
> On Mon, Oct 19, 2020 at 11:36:16AM -0500, Tom Lendacky wrote:
> 
>>> io_remap_pfn_range()? Is there use cases where a caller actually wants
>>> encrypted io memory?
>>
>> As long as you never have physical memory / ram being mapped in this path,
>> it seems that applying pgprot_decrypted() would be ok.
> 
> I made a patch along these lines:
> 
> https://github.com/jgunthorpe/linux/commit/fc990842983f3530b72fcceafed84bd6075174a1
> 
> Just waiting for the 0-day bots to check it
> 
> I now have a report that SME works OK but when the same test is done
> inside a VM with SEV it fails again - is there something else needed
> for the SEV case?

Probably. I would assume that it is getting past the MMIO issue, since the
above patch should cover SEV, too. But, with SEV, all DMA to and from the
guest is unencrypted. I'm not familiar with how the DMA is setup and
performed in this situation, but if the DMA is occurring to userspace
buffers that are mapped as encrypted, then the resulting access will be
ciphertext (either reading unencrypted data from the device as encrypted
or writing encrypted data to the device that should be unencrypted). There
isn't currently an API to allow userspace to change its mapping from
encrypted to unencrypted.

> 
> This would be using VFIO with qemu and KVM to assign the PCI device to
> the guest, it seems the guest kernel driver is able to use the device
> but the guest userspace fails.

In the kernel, the SWIOTLB support is used to bounce the data from
encrypted to unencrypted and vice-versa.

Thanks,
Tom

> 
> Regards,
> Jason
> 


Re: AMD SME encrpytion and PCI BAR pages to user space

2020-10-19 Thread Tom Lendacky
On 10/19/20 12:00 PM, Jason Gunthorpe wrote:
> On Mon, Oct 19, 2020 at 11:36:16AM -0500, Tom Lendacky wrote:
> 
>>> Is RDMA missing something? I don't see anything special in VFIO for
>>> instance and the two are very similar - does VFIO work with SME, eg
>>> DPDK or something unrelated to virtualization?
>>
>> If user space is mapping un-encrypted memory, then, yes, it would seem
>> that there is a gap in the support where the pgprot_decrypted() would be
>> needed in order to override the protection map.
> 
> It isn't "memory" it is PCI BAR pages, eg memory mapped IO

Right, I understand that.

> 
>>> Is there a reason not to just add prot_decrypted() to
>>> io_remap_pfn_range()? Is there use cases where a caller actually wants
>>> encrypted io memory?
>>
>> As long as you never have physical memory / ram being mapped in this path,
>> it seems that applying pgprot_decrypted() would be ok.
> 
> I think the word 'io' implies this is the case..

Heh, you would think so, but I found quite a few things that used ioremap
instead of memremap when developing this.

> 
> Let me make a patch for this avenue then, I think it is not OK to add
> pgprot_decrypted to every driver.. We already have the special
> distinction with io and non-io remap, that seems better.

Yup, seems reasonable.

> 
>>> I saw your original patch series edited a few drivers this way, but
>>> not nearly enough. So I feel like I'm missing something.. Does vfio
>>> work with SME? I couldn't find any sign of it calling prot_decrypted()
>>> either?
>>
>> I haven't tested SME with VFIO/DPDK.
> 
> Hum, I assume it is broken also. Actually quite a swath of drivers
> and devices will be broken under this :\

Not sure what you mean by the last statement - in general or when running
under VFIO/DPDK? In general, traditional in kernel drivers work just fine
under SME without any changes.

Thanks,
Tom

> 
> Jason
> 


Re: AMD SME encrpytion and PCI BAR pages to user space

2020-10-19 Thread Tom Lendacky
On 10/19/20 10:25 AM, Jason Gunthorpe wrote:
> Hi Tom,

Hi Jason,

> 
> We've found a bug where systems that have the AMD SME turned on are
> not able to run RDMA work loads. It seems the kernel is automatically
> encrypting VMA's pointing at PCI BAR memory created by
> io_remap_pfn_range() - adding a prot_decrypted() causes things to
> start working.
> 
> To me this is surprising, before I go adding random prot_decrypted()
> into the RDMA subsystem can you confirm this is actually how things
> are expected to work?

Yes, currently, the idea is that anything being done in user space is
mapped encrypted.

> 
> Is RDMA missing something? I don't see anything special in VFIO for
> instance and the two are very similar - does VFIO work with SME, eg
> DPDK or something unrelated to virtualization?

If user space is mapping un-encrypted memory, then, yes, it would seem
that there is a gap in the support where the pgprot_decrypted() would be
needed in order to override the protection map.

>  
> Is there a reason not to just add prot_decrypted() to
> io_remap_pfn_range()? Is there use cases where a caller actually wants
> encrypted io memory?

As long as you never have physical memory / ram being mapped in this path,
it seems that applying pgprot_decrypted() would be ok.

> 
> I saw your original patch series edited a few drivers this way, but
> not nearly enough. So I feel like I'm missing something.. Does vfio
> work with SME? I couldn't find any sign of it calling prot_decrypted()
> either?

I haven't tested SME with VFIO/DPDK.

> 
> (BTW, I don't have any AMD SME systems to test on here, I'm getting
>  this bug report from deployed system, running a distro kernel)

As a work around, if the system has support for TSME (transparent SME),
then that can be enabled (it is a BIOS option that the BIOS vendor would
have had to expose) to encrypt all of the system memory without requiring
SME support.

Thanks,
Tom

> 
> Thanks,
> Jason
> 


Re: [RFC PATCH 00/35] SEV-ES hypervisor support

2020-10-13 Thread Tom Lendacky
Apologies, Sean.

I thought I had replied to this but found it instead in my drafts folder...

I've taken much of your feedback and incorporated that into the next
version of the patches that I submitted and updated this response based on
that, too.

On 9/15/20 7:19 PM, Sean Christopherson wrote:
> On Tue, Sep 15, 2020 at 12:22:05PM -0500, Tom Lendacky wrote:
>> On 9/14/20 5:59 PM, Sean Christopherson wrote:
>>> Given that we don't yet have publicly available KVM code for TDX, what if I
>>> generate and post a list of ioctls() that are denied by either SEV-ES or 
>>> TDX,
>>> organized by the denier(s)?  Then for the ioctls() that are denied by one 
>>> and
>>> not the other, we add a brief explanation of why it's denied?
>>>
>>> If that sounds ok, I'll get the list and the TDX side of things posted
>>> tomorrow.
>>
>> That sounds good.
> 
> TDX completely blocks the following ioctl()s:

SEV-ES doesn't need to completely block these ioctls. SEV-SNP is likely to
do more of that. SEV-ES will still allow interrupts to be injected, or
registers to be retrieved (which will only contain what was provided in
the GHCB exchange), etc.

> 
>   kvm_vcpu_ioctl_interrupt
>   kvm_vcpu_ioctl_smi
>   kvm_vcpu_ioctl_x86_setup_mce
>   kvm_vcpu_ioctl_x86_set_mce
>   kvm_vcpu_ioctl_x86_get_debugregs
>   kvm_vcpu_ioctl_x86_set_debugregs
>   kvm_vcpu_ioctl_x86_get_xsave
>   kvm_vcpu_ioctl_x86_set_xsave
>   kvm_vcpu_ioctl_x86_get_xcrs
>   kvm_vcpu_ioctl_x86_set_xcrs
>   kvm_arch_vcpu_ioctl_get_regs
>   kvm_arch_vcpu_ioctl_set_regs
>   kvm_arch_vcpu_ioctl_get_sregs
>   kvm_arch_vcpu_ioctl_set_sregs
>   kvm_arch_vcpu_ioctl_set_guest_debug
>   kvm_arch_vcpu_ioctl_get_fpu
>   kvm_arch_vcpu_ioctl_set_fpu

Of the listed ioctls, really the only ones I've updated are:

  kvm_vcpu_ioctl_x86_get_xsave
  kvm_vcpu_ioctl_x86_set_xsave

  kvm_arch_vcpu_ioctl_get_sregs
This allows reading of the tracking value registers
  kvm_arch_vcpu_ioctl_set_sregs
This prevents setting of register values

  kvm_arch_vcpu_ioctl_set_guest_debug

  kvm_arch_vcpu_ioctl_get_fpu
  kvm_arch_vcpu_ioctl_set_fpu

> 
> Looking through the code, I think kvm_arch_vcpu_ioctl_get_mpstate() and
> kvm_arch_vcpu_ioctl_set_mpstate() should also be disallowed, we just haven't
> actually done so.

I haven't done anything with these either.

> 
> There are also two helper functions that are "blocked".
> dm_request_for_irq_injection() returns false if guest_state_protected, and
> post_kvm_run_save() shoves dummy state.

... and these.

> 
> TDX also selectively blocks/skips portions of other ioctl()s so that the
> TDX code itself can yell loudly if e.g. .get_cpl() is invoked.  The event
> injection restrictions are due to direct injection not being allowed (except
> for NMIs); all IRQs have to be routed through APICv (posted interrupts) and
> exception injection is completely disallowed.

For SEV-ES, we don't have those restrictions.

> 
>   kvm_vcpu_ioctl_x86_get_vcpu_events:
>   if (!vcpu->kvm->arch.guest_state_protected)
>   events->interrupt.shadow = 
> kvm_x86_ops.get_interrupt_shadow(vcpu);
> 
>   kvm_arch_vcpu_put:
> if (vcpu->preempted && !vcpu->kvm->arch.guest_state_protected)
> vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
> 
>   kvm_vcpu_ioctl_x86_set_vcpu_events:
>   u32 allowed_flags = KVM_VCPUEVENT_VALID_NMI_PENDING |
>   KVM_VCPUEVENT_VALID_SIPI_VECTOR |
>   KVM_VCPUEVENT_VALID_SHADOW |
>   KVM_VCPUEVENT_VALID_SMM |
>   KVM_VCPUEVENT_VALID_PAYLOAD;
> 
>   if (vcpu->kvm->arch.guest_state_protected)
>   allowed_flags = KVM_VCPUEVENT_VALID_NMI_PENDING;
> 
> 
>   kvm_arch_vcpu_ioctl_run:
>   if (vcpu->kvm->arch.guest_state_protected)
>   kvm_sync_valid_fields = KVM_SYNC_X86_EVENTS;
>   else
>   kvm_sync_valid_fields = KVM_SYNC_X86_VALID_FIELDS;
> 
> 
> In addition to the more generic guest_state_protected, we also (obviously
> tentatively) have a few other flags to deal with aspects of TDX that I'm
> fairly certain don't apply to SEV-ES:
> 
>   tsc_immutable - KVM doesn't have write access to the TSC offset of the
>   guest.
> 
>   eoi_intercept_unsupported - KVM can't intercept EOIs (doesn't have access
>   to EOI bitmaps) and so can't support level
>   triggered interrupts, at least not without
>   extra pain.
> 
>   readonly_mem_unsupported - Secure EPT (analagous to SNP) requires RWX
>  permissions for all private/encrypted memory.
>  S-EPT isn't optional, so we get the joy of
>  adding this right off the bat...

Yes, most of the above stuff doesn't apply to SEV-ES.

Thanks,
Tom

> 


[RFC PATCH v2 31/33] KVM: SVM: Provide support for SEV-ES vCPU loading

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

An SEV-ES vCPU requires additional VMCB vCPU load/put requirements. SEV-ES
hardware will restore certain registers on VMEXIT, but not save them on
VMRUM (see Table B-3 and Table B-4 of the AMD64 APM Volume 2), so make the
following changes:

General vCPU load changes:
  - During vCPU loading, perform a VMSAVE to the per-CPU SVM save area and
save the current values of XCR0, XSS and PKRU to the per-CPU SVM save
area as these registers will be restored on VMEXIT.

General vCPU put changes:
  - Do not attempt to restore registers that SEV-ES hardware has already
restored on VMEXIT.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/svm.h | 10 ---
 arch/x86/kvm/svm/sev.c | 54 ++
 arch/x86/kvm/svm/svm.c | 36 -
 arch/x86/kvm/svm/svm.h | 22 +++-
 arch/x86/kvm/x86.c |  3 ++-
 arch/x86/kvm/x86.h |  1 +
 6 files changed, 103 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index a57331de59e2..1c561945b426 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -234,7 +234,8 @@ struct vmcb_save_area {
u8 cpl;
u8 reserved_2[4];
u64 efer;
-   u8 reserved_3[112];
+   u8 reserved_3[104];
+   u64 xss;/* Valid for SEV-ES only */
u64 cr4;
u64 cr3;
u64 cr0;
@@ -265,9 +266,12 @@ struct vmcb_save_area {
 
/*
 * The following part of the save area is valid only for
-* SEV-ES guests when referenced through the GHCB.
+* SEV-ES guests when referenced through the GHCB or for
+* saving to the host save area.
 */
-   u8 reserved_7[104];
+   u8 reserved_7[80];
+   u32 pkru;
+   u8 reserved_7a[20];
u64 reserved_8; /* rax already available at 0x01f8 */
u64 rcx;
u64 rdx;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1798a2eefcdd..6be4f0cbf09d 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -17,12 +17,15 @@
 #include 
 
 #include 
+#include 
 
 #include "x86.h"
 #include "svm.h"
 #include "cpuid.h"
 #include "trace.h"
 
+#define __ex(x) __kvm_handle_fault_on_reboot(x)
+
 static u8 sev_enc_bit;
 static int sev_flush_asids(void);
 static DECLARE_RWSEM(sev_deactivate_lock);
@@ -1805,3 +1808,54 @@ void sev_es_create_vcpu(struct vcpu_svm *svm)
GHCB_VERSION_MIN,
sev_enc_bit));
 }
+
+void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu)
+{
+   struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+   struct vmcb_save_area *hostsa;
+   unsigned int i;
+
+   /*
+* As an SEV-ES guest, hardware will restore the host state on VMEXIT,
+* of which one step is to perform a VMLOAD. Since hardware does not
+* perform a VMSAVE on VMRUN, the host savearea must be updated.
+*/
+   asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : 
"memory");
+
+   /*
+* Certain MSRs are restored on VMEXIT, only save ones that aren't
+* restored.
+*/
+   for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
+   if (host_save_user_msrs[i].sev_es_restored)
+   continue;
+
+   rdmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
+   }
+
+   /* XCR0 is restored on VMEXIT, save the current host value */
+   hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
+   hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+
+   /* PKRU is restored on VMEXIT, save the curent host value */
+   hostsa->pkru = read_pkru();
+
+   /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
+   hostsa->xss = host_xss;
+}
+
+void sev_es_vcpu_put(struct vcpu_svm *svm)
+{
+   unsigned int i;
+
+   /*
+* Certain MSRs are restored on VMEXIT and were saved with vmsave in
+* sev_es_vcpu_load() above. Only restore ones that weren't.
+*/
+   for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) {
+   if (host_save_user_msrs[i].sev_es_restored)
+   continue;
+
+   wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
+   }
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b3e2c993bc4c..35d6f27ef288 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1435,15 +1435,20 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int 
cpu)
vmcb_mark_all_dirty(svm->vmcb);
}
 
+   if (sev_es_guest(svm->vcpu.kvm)) {
+   sev_es_vcpu_load(svm, cpu);
+   } else {
 #ifdef CONFIG_X86_64
-   rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_

[RFC PATCH v2 32/33] KVM: SVM: Provide an updated VMRUN invocation for SEV-ES guests

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

The run sequence is different for an SEV-ES guest compared to a legacy or
even an SEV guest. The guest vCPU register state of an SEV-ES guest will
be restored on VMRUN and saved on VMEXIT. There is no need to restore the
guest registers directly and through VMLOAD before VMRUN and no need to
save the guest registers directly and through VMSAVE on VMEXIT.

Update the svm_vcpu_run() function to skip register state saving and
restoring and provide an alternative function for running an SEV-ES guest
in vmenter.S

Additionally, certain host state is restored across an SEV-ES VMRUN. As
a result certain register states are not required to be restored upon
VMEXIT (e.g. FS, GS, etc.), so only do that if the guest is not an SEV-ES
guest.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c | 25 ---
 arch/x86/kvm/svm/svm.h |  5 
 arch/x86/kvm/svm/vmenter.S | 50 ++
 arch/x86/kvm/x86.c |  6 +
 4 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 35d6f27ef288..90843131cc92 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3760,16 +3760,20 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu 
*vcpu,
guest_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
 
-   __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
+   if (sev_es_guest(svm->vcpu.kvm)) {
+   __svm_sev_es_vcpu_run(svm->vmcb_pa);
+   } else {
+   __svm_vcpu_run(svm->vmcb_pa, (unsigned long 
*)&svm->vcpu.arch.regs);
 
 #ifdef CONFIG_X86_64
-   native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+   native_wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 #else
-   loadsegment(fs, svm->host.fs);
+   loadsegment(fs, svm->host.fs);
 #ifndef CONFIG_X86_32_LAZY_GS
-   loadsegment(gs, svm->host.gs);
+   loadsegment(gs, svm->host.gs);
 #endif
 #endif
+   }
 
/*
 * VMEXIT disables interrupts (host state), but tracing and lockdep
@@ -3864,14 +3868,17 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct 
kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
-   reload_tss(vcpu);
+   if (!sev_es_guest(svm->vcpu.kvm))
+   reload_tss(vcpu);
 
x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
 
-   vcpu->arch.cr2 = svm->vmcb->save.cr2;
-   vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
-   vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
-   vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
+   if (!sev_es_guest(svm->vcpu.kvm)) {
+   vcpu->arch.cr2 = svm->vmcb->save.cr2;
+   vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
+   vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
+   vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
+   }
 
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_interrupt(&svm->vcpu);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index acbff817559b..a826e7de1663 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -584,4 +584,9 @@ void sev_es_create_vcpu(struct vcpu_svm *svm);
 void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu);
 void sev_es_vcpu_put(struct vcpu_svm *svm);
 
+/* vmenter.S */
+
+void __svm_sev_es_vcpu_run(unsigned long vmcb_pa);
+void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
+
 #endif
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 1ec1ac40e328..6feb8c08f45a 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -168,3 +168,53 @@ SYM_FUNC_START(__svm_vcpu_run)
pop %_ASM_BP
ret
 SYM_FUNC_END(__svm_vcpu_run)
+
+/**
+ * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
+ * @vmcb_pa:   unsigned long
+ */
+SYM_FUNC_START(__svm_sev_es_vcpu_run)
+   push %_ASM_BP
+#ifdef CONFIG_X86_64
+   push %r15
+   push %r14
+   push %r13
+   push %r12
+#else
+   push %edi
+   push %esi
+#endif
+   push %_ASM_BX
+
+   /* Enter guest mode */
+   mov %_ASM_ARG1, %_ASM_AX
+   sti
+
+1: vmrun %_ASM_AX
+   jmp 3f
+2: cmpb $0, kvm_rebooting
+   jne 3f
+   ud2
+   _ASM_EXTABLE(1b, 2b)
+
+3: cli
+
+#ifdef CONFIG_RETPOLINE
+   /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+   FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+#endif
+
+   pop %_ASM_BX
+
+#ifdef CONFIG_X86_64
+   pop %r12
+   pop %r13
+   pop %r14
+   pop %r15
+#else
+   po

[RFC PATCH v2 33/33] KVM: SVM: Provide support to launch and run an SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

An SEV-ES guest is started by invoking a new SEV initialization ioctl,
KVM_SEV_ES_INIT. This identifies the guest as an SEV-ES guest, which is
used to drive the appropriate ASID allocation, VMSA encryption, etc.

Before being able to run an SEV-ES vCPU, the vCPU VMSA must be encrypted
and measured. This is done using the LAUNCH_UPDATE_VMSA command after all
calls to LAUNCH_UPDATE_DATA have been performed, but before LAUNCH_MEASURE
has been performed. In order to establish the encrypted VMSA, the current
(traditional) VMSA and the GPRs are synced to the page that will hold the
encrypted VMSA and then LAUNCH_UPDATE_VMSA is invoked. The vCPU is then
marked as having protected guest state.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 104 +
 1 file changed, 104 insertions(+)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 6be4f0cbf09d..db8ccb3270f2 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -202,6 +202,16 @@ static int sev_guest_init(struct kvm *kvm, struct 
kvm_sev_cmd *argp)
return ret;
 }
 
+static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+   if (!sev_es)
+   return -ENOTTY;
+
+   to_kvm_svm(kvm)->sev_info.es_active = true;
+
+   return sev_guest_init(kvm, argp);
+}
+
 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
 {
struct sev_data_activate *data;
@@ -500,6 +510,94 @@ static int sev_launch_update_data(struct kvm *kvm, struct 
kvm_sev_cmd *argp)
return ret;
 }
 
+static int sev_es_sync_vmsa(struct vcpu_svm *svm)
+{
+   struct vmcb_save_area *save = &svm->vmcb->save;
+
+   /* Check some debug related fields before encrypting the VMSA */
+   if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
+   return -EINVAL;
+
+   /* Sync registgers */
+   save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
+   save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
+   save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+   save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX];
+   save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP];
+   save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP];
+   save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI];
+   save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI];
+   save->r8  = svm->vcpu.arch.regs[VCPU_REGS_R8];
+   save->r9  = svm->vcpu.arch.regs[VCPU_REGS_R9];
+   save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10];
+   save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11];
+   save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12];
+   save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13];
+   save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14];
+   save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15];
+   save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
+
+   /* Sync some non-GPR registers before encrypting */
+   save->xcr0 = svm->vcpu.arch.xcr0;
+   save->pkru = svm->vcpu.arch.pkru;
+   save->xss  = svm->vcpu.arch.ia32_xss;
+
+   /*
+* SEV-ES will use a VMSA that is pointed to by the VMCB, not
+* the traditional VMSA that is part of the VMCB. Copy the
+* traditional VMSA as it has been built so far (in prep
+* for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
+*/
+   memcpy(svm->vmsa, save, sizeof(*save));
+
+   return 0;
+}
+
+static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
+{
+   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+   struct sev_data_launch_update_vmsa *vmsa;
+   int i, ret;
+
+   if (!sev_es_guest(kvm))
+   return -ENOTTY;
+
+   vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL);
+   if (!vmsa)
+   return -ENOMEM;
+
+   for (i = 0; i < kvm->created_vcpus; i++) {
+   struct vcpu_svm *svm = to_svm(kvm->vcpus[i]);
+
+   /* Perform some pre-encryption checks against the VMSA */
+   ret = sev_es_sync_vmsa(svm);
+   if (ret)
+   goto e_free;
+
+   /*
+* The LAUNCH_UPDATE_VMSA command will perform in-place
+* encryption of the VMSA memory content (i.e it will write
+* the same memory region with the guest's key), so invalidate
+* it first.
+*/
+   clflush_cache_range(svm->vmsa, PAGE_SIZE);
+
+   vmsa->handle = sev->handle;
+   vmsa->address = __sme_pa(svm->vmsa);
+   vmsa->len = PAGE_SIZE;
+   ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa,
+   &argp->error);
+   if (ret)
+   goto e_free;
+
+   

[RFC PATCH v2 30/33] KVM: SVM: Provide support for SEV-ES vCPU creation/loading

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

An SEV-ES vCPU requires additional VMCB initialization requirements for
vCPU creation and vCPU load/put requirements. This includes:

General VMCB initialization changes:
  - Set a VMCB control bit to enable SEV-ES support on the vCPU.
  - Set the VMCB encrypted VM save area address.
  - CRx registers are part of the encrypted register state and cannot be
updated. Remove the CRx register read and write intercepts and replace
them with CRx register write traps to track the CRx register values.
  - Certain MSR values are part of the encrypted register state and cannot
be updated. Remove certain MSR intercepts (EFER, CR_PAT, etc.).
  - Remove the #GP intercept (no support for "enable_vmware_backdoor").
  - Remove the XSETBV intercept since the hypervisor cannot modify XCR0.

General vCPU creation changes:
  - Set the initial GHCB gpa value as per the GHCB specification.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/svm.h | 15 +-
 arch/x86/kvm/svm/sev.c | 56 ++
 arch/x86/kvm/svm/svm.c | 20 --
 arch/x86/kvm/svm/svm.h |  6 +++-
 4 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index caa8628f5fba..a57331de59e2 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -98,6 +98,16 @@ enum {
INTERCEPT_MWAIT_COND,
INTERCEPT_XSETBV,
INTERCEPT_RDPRU,
+   TRAP_EFER_WRITE,
+   TRAP_CR0_WRITE,
+   TRAP_CR1_WRITE,
+   TRAP_CR2_WRITE,
+   TRAP_CR3_WRITE,
+   TRAP_CR4_WRITE,
+   TRAP_CR5_WRITE,
+   TRAP_CR6_WRITE,
+   TRAP_CR7_WRITE,
+   TRAP_CR8_WRITE,
/* Byte offset 014h (word 5) */
INTERCEPT_INVLPGB = 160,
INTERCEPT_INVLPGB_ILLEGAL,
@@ -144,6 +154,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u8 reserved_6[8];   /* Offset 0xe8 */
u64 avic_logical_id;/* Offset 0xf0 */
u64 avic_physical_id;   /* Offset 0xf8 */
+   u8 reserved_7[8];
+   u64 vmsa_pa;/* Used for an SEV-ES guest */
 };
 
 
@@ -198,6 +210,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 
 #define SVM_NESTED_CTL_NP_ENABLE   BIT(0)
 #define SVM_NESTED_CTL_SEV_ENABLE  BIT(1)
+#define SVM_NESTED_CTL_SEV_ES_ENABLE   BIT(2)
 
 struct vmcb_seg {
u16 selector;
@@ -295,7 +308,7 @@ struct ghcb {
 
 
 #define EXPECTED_VMCB_SAVE_AREA_SIZE   1032
-#define EXPECTED_VMCB_CONTROL_AREA_SIZE256
+#define EXPECTED_VMCB_CONTROL_AREA_SIZE272
 #define EXPECTED_GHCB_SIZE PAGE_SIZE
 
 static inline void __unused_size_checks(void)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 477f6afe5e33..1798a2eefcdd 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1749,3 +1749,59 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, 
u8 vector)
ghcb_set_sw_exit_info_2(svm->ghcb, 1);
svm->ap_hlt_loop = false;
 }
+
+void sev_es_init_vmcb(struct vcpu_svm *svm)
+{
+   struct kvm_vcpu *vcpu = &svm->vcpu;
+
+   svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
+   svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+
+   /*
+* An SEV-ES guest requires a VMSA area that is a separate from the
+* VMCB page. Do not include the encryption mask on the VMSA physical
+* address since hardware will access it using the guest key.
+*/
+   svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
+
+   /* Can't intercept CR register access, HV can't modify CR registers */
+   svm_clr_intercept(svm, INTERCEPT_CR0_READ);
+   svm_clr_intercept(svm, INTERCEPT_CR4_READ);
+   svm_clr_intercept(svm, INTERCEPT_CR8_READ);
+   svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
+   svm_clr_intercept(svm, INTERCEPT_CR4_WRITE);
+   svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);
+
+   svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0);
+
+   /* Track EFER/CR register changes */
+   svm_set_intercept(svm, TRAP_EFER_WRITE);
+   svm_set_intercept(svm, TRAP_CR0_WRITE);
+   svm_set_intercept(svm, TRAP_CR4_WRITE);
+   svm_set_intercept(svm, TRAP_CR8_WRITE);
+
+   /* No support for enable_vmware_backdoor */
+   clr_exception_intercept(svm, GP_VECTOR);
+
+   /* Can't intercept XSETBV, HV can't modify XCR0 directly */
+   svm_clr_intercept(svm, INTERCEPT_XSETBV);
+
+   /* Clear intercepts on selected MSRs */
+   set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
+   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
+   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
+   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+   set_msr_interception(vcpu, svm->msrp

[RFC PATCH v2 28/33] KVM: SVM: Set the encryption mask for the SVM host save area

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

The SVM host save area is used to restore some host state on VMEXIT of an
SEV-ES guest. After allocating the save area, clear it and add the
encryption mask to the SVM host save area physical address that is
programmed into the VM_HSAVE_PA MSR.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 1 -
 arch/x86/kvm/svm/svm.c | 3 ++-
 arch/x86/kvm/svm/svm.h | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index d30ceac85f88..4673bed1c923 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -31,7 +31,6 @@ unsigned int max_sev_asid;
 static unsigned int min_sev_asid;
 static unsigned long *sev_asid_bitmap;
 static unsigned long *sev_reclaim_asid_bitmap;
-#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
 
 struct enc_region {
struct list_head list;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 69ccffd0aef0..ff8f21ef2edb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -478,7 +478,7 @@ static int svm_hardware_enable(void)
 
wrmsrl(MSR_EFER, efer | EFER_SVME);
 
-   wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
+   wrmsrl(MSR_VM_HSAVE_PA, __sme_page_pa(sd->save_area));
 
if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
@@ -546,6 +546,7 @@ static int svm_cpu_init(int cpu)
sd->save_area = alloc_page(GFP_KERNEL);
if (!sd->save_area)
goto free_cpu_data;
+   clear_page(page_address(sd->save_area));
 
if (svm_sev_enabled()) {
sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 0d011f68064c..75733163294f 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -21,6 +21,8 @@
 
 #include 
 
+#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
-- 
2.28.0



[RFC PATCH v2 29/33] KVM: SVM: Update ASID allocation to support SEV-ES guests

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

SEV and SEV-ES guests each have dedicated ASID ranges. Update the ASID
allocation routine to return an ASID in the respective range.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 25 ++---
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 4673bed1c923..477f6afe5e33 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -62,19 +62,19 @@ static int sev_flush_asids(void)
 }
 
 /* Must be called with the sev_bitmap_lock held */
-static bool __sev_recycle_asids(void)
+static bool __sev_recycle_asids(int min_asid, int max_asid)
 {
int pos;
 
/* Check if there are any ASIDs to reclaim before performing a flush */
-   pos = find_next_bit(sev_reclaim_asid_bitmap,
-   max_sev_asid, min_sev_asid - 1);
-   if (pos >= max_sev_asid)
+   pos = find_next_bit(sev_reclaim_asid_bitmap, max_sev_asid, min_asid);
+   if (pos >= max_asid)
return false;
 
if (sev_flush_asids())
return false;
 
+   /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
   max_sev_asid);
bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
@@ -82,20 +82,23 @@ static bool __sev_recycle_asids(void)
return true;
 }
 
-static int sev_asid_new(void)
+static int sev_asid_new(struct kvm_sev_info *sev)
 {
+   int pos, min_asid, max_asid;
bool retry = true;
-   int pos;
 
mutex_lock(&sev_bitmap_lock);
 
/*
-* SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
+* SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+* SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
 */
+   min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+   max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
 again:
-   pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 
1);
-   if (pos >= max_sev_asid) {
-   if (retry && __sev_recycle_asids()) {
+   pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
+   if (pos >= max_asid) {
+   if (retry && __sev_recycle_asids(min_asid, max_asid)) {
retry = false;
goto again;
}
@@ -177,7 +180,7 @@ static int sev_guest_init(struct kvm *kvm, struct 
kvm_sev_cmd *argp)
if (unlikely(sev->active))
return ret;
 
-   asid = sev_asid_new();
+   asid = sev_asid_new(sev);
if (asid < 0)
return ret;
 
-- 
2.28.0



[RFC PATCH v2 21/33] KVM: SVM: Add support for CR4 write traps for an SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

For SEV-ES guests, the interception of control register write access
is not recommended. Control register interception occurs prior to the
control register being modified and the hypervisor is unable to modify
the control register itself because the register is located in the
encrypted register state.

SEV-ES guests introduce new control register write traps. These traps
provide intercept support of a control register write after the control
register has been modified. The new control register value is provided in
the VMCB EXITINFO1 field, allowing the hypervisor to track the setting
of the guest control registers.

Add support to track the value of the guest CR4 register using the control
register write trap so that the hypervisor understands the guest operating
mode.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/include/uapi/asm/svm.h |  1 +
 arch/x86/kvm/svm/svm.c  |  6 ++
 arch/x86/kvm/x86.c  | 31 ---
 4 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b021d992fa46..44da687855e0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1447,6 +1447,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 
tss_selector, int idt_index,
 int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long 
cr0);
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
+int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long 
cr4);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 671b8b1ad9e1..423f242a7a8d 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -203,6 +203,7 @@
{ SVM_EXIT_XSETBV,  "xsetbv" }, \
{ SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \
{ SVM_EXIT_CR0_WRITE_TRAP,  "write_cr0_trap" }, \
+   { SVM_EXIT_CR4_WRITE_TRAP,  "write_cr4_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 913da18520a2..6b6cf071e656 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2493,6 +2493,11 @@ static int cr_trap(struct vcpu_svm *svm)
 
ret = __kvm_set_cr0(&svm->vcpu, old_value, new_value);
break;
+   case 4:
+   old_value = kvm_read_cr4(&svm->vcpu);
+
+   ret = __kvm_set_cr4(&svm->vcpu, old_value, new_value);
+   break;
default:
WARN(1, "unhandled CR%d write trap", cr);
ret = 1;
@@ -3083,6 +3088,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_RDPRU]= rdpru_interception,
[SVM_EXIT_EFER_WRITE_TRAP]  = efer_trap,
[SVM_EXIT_CR0_WRITE_TRAP]   = cr_trap,
+   [SVM_EXIT_CR4_WRITE_TRAP]   = cr_trap,
[SVM_EXIT_INVPCID]  = invpcid_interception,
[SVM_EXIT_NPF]  = npf_interception,
[SVM_EXIT_RSM]  = rsm_interception,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a8b2d79eb2a3..90a551360207 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -984,6 +984,25 @@ int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 }
 EXPORT_SYMBOL_GPL(kvm_valid_cr4);
 
+int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long 
cr4)
+{
+   unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+  X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
+
+   if (kvm_x86_ops.set_cr4(vcpu, cr4))
+   return 1;
+
+   if (((cr4 ^ old_cr4) & pdptr_bits) ||
+   (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
+   kvm_mmu_reset_context(vcpu);
+
+   if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+   kvm_update_cpuid_runtime(vcpu);
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(__kvm_set_cr4);
+
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
unsigned long old_cr4 = kvm_read_cr4(vcpu);
@@ -1013,17 +1032,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
}
 
-   if (kvm_x86_ops.set_cr4(vcpu, cr4))
-   return 1;
-
-   if (((cr4 ^ old_cr4) & pdptr_bits

[RFC PATCH v2 22/33] KVM: SVM: Add support for CR8 write traps for an SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

For SEV-ES guests, the interception of control register write access
is not recommended. Control register interception occurs prior to the
control register being modified and the hypervisor is unable to modify
the control register itself because the register is located in the
encrypted register state.

SEV-ES guests introduce new control register write traps. These traps
provide intercept support of a control register write after the control
register has been modified. The new control register value is provided in
the VMCB EXITINFO1 field, allowing the hypervisor to track the setting
of the guest control registers.

Add support to track the value of the guest CR8 register using the control
register write trap so that the hypervisor understands the guest operating
mode.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/uapi/asm/svm.h | 1 +
 arch/x86/kvm/svm/svm.c  | 4 
 2 files changed, 5 insertions(+)

diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 423f242a7a8d..b486c02935ef 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -204,6 +204,7 @@
{ SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \
{ SVM_EXIT_CR0_WRITE_TRAP,  "write_cr0_trap" }, \
{ SVM_EXIT_CR4_WRITE_TRAP,  "write_cr4_trap" }, \
+   { SVM_EXIT_CR8_WRITE_TRAP,  "write_cr8_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6b6cf071e656..7082432db161 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2498,6 +2498,9 @@ static int cr_trap(struct vcpu_svm *svm)
 
ret = __kvm_set_cr4(&svm->vcpu, old_value, new_value);
break;
+   case 8:
+   ret = kvm_set_cr8(&svm->vcpu, new_value);
+   break;
default:
WARN(1, "unhandled CR%d write trap", cr);
ret = 1;
@@ -3089,6 +3092,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_EFER_WRITE_TRAP]  = efer_trap,
[SVM_EXIT_CR0_WRITE_TRAP]   = cr_trap,
[SVM_EXIT_CR4_WRITE_TRAP]   = cr_trap,
+   [SVM_EXIT_CR8_WRITE_TRAP]   = cr_trap,
[SVM_EXIT_INVPCID]  = invpcid_interception,
[SVM_EXIT_NPF]  = npf_interception,
[SVM_EXIT_RSM]  = rsm_interception,
-- 
2.28.0



[RFC PATCH v2 24/33] KVM: SVM: Do not report support for SMM for an SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

SEV-ES guests do not currently support SMM. Update the has_emulated_msr()
kvm_x86_ops function to take a struct kvm parameter so that the capability
can be reported at a VM level.

Since this op is also called during KVM initialization and before a struct
kvm instance is available, comments will be added to each implementation
of has_emulated_msr() to indicate the kvm parameter can be null.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/svm/svm.c  | 11 ++-
 arch/x86/kvm/vmx/vmx.c  |  6 +-
 arch/x86/kvm/x86.c  |  4 ++--
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 44da687855e0..30f1300a05c0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1062,7 +1062,7 @@ struct kvm_x86_ops {
void (*hardware_disable)(void);
void (*hardware_unsetup)(void);
bool (*cpu_has_accelerated_tpr)(void);
-   bool (*has_emulated_msr)(u32 index);
+   bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
 
unsigned int vm_size;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7082432db161..de44f7f2b7a8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3934,12 +3934,21 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
 }
 
-static bool svm_has_emulated_msr(u32 index)
+/*
+ * The kvm parameter can be NULL (module initialization, or invocation before
+ * VM creation). Be sure to check the kvm parameter before using it.
+ */
+static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
 {
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return false;
+   case MSR_IA32_SMBASE:
+   /* SEV-ES guests do not support SMM, so report false */
+   if (kvm && sev_es_guest(kvm))
+   return false;
+   break;
default:
break;
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4551a7e80ebc..4fb4f488d4e1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6372,7 +6372,11 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
handle_exception_nmi_irqoff(vmx);
 }
 
-static bool vmx_has_emulated_msr(u32 index)
+/*
+ * The kvm parameter can be NULL (module initialization, or invocation before
+ * VM creation). Be sure to check the kvm parameter before using it.
+ */
+static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
 {
switch (index) {
case MSR_IA32_SMBASE:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 39c8d9a311d4..20fabf578ab7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3690,7 +3690,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
ext)
 * fringe case that is not enabled except via specific settings
 * of the module parameters.
 */
-   r = kvm_x86_ops.has_emulated_msr(MSR_IA32_SMBASE);
+   r = kvm_x86_ops.has_emulated_msr(kvm, MSR_IA32_SMBASE);
break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops.cpu_has_accelerated_tpr();
@@ -5688,7 +5688,7 @@ static void kvm_init_msr_list(void)
}
 
for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
-   if (!kvm_x86_ops.has_emulated_msr(emulated_msrs_all[i]))
+   if (!kvm_x86_ops.has_emulated_msr(NULL, emulated_msrs_all[i]))
continue;
 
emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
-- 
2.28.0



[RFC PATCH v2 26/33] KVM: SVM: Add support for booting APs for an SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence,
where the guest vCPU register state is updated and then the vCPU is VMRUN
to begin execution of the AP. For an SEV-ES guest, this won't work because
the guest register state is encrypted.

Following the GHCB specification, the hypervisor must not alter the guest
register state, so KVM must track an AP/vCPU boot. Should the guest want
to park the AP, it must use the AP Reset Hold exit event in place of, for
example, a HLT loop.

First AP boot (first INIT-SIPI-SIPI sequence):
  Execute the AP (vCPU) as it was initialized and measured by the SEV-ES
  support. It is up to the guest to transfer control of the AP to the
  proper location.

Subsequent AP boot:
  KVM will expect to receive an AP Reset Hold exit event indicating that
  the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to
  awaken it. When the AP Reset Hold exit event is received, KVM will place
  the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI
  sequence, KVM will make the vCPU runnable. It is again up to the guest
  to then transfer control of the AP to the proper location.

The GHCB specification also requires the hypervisor to save the address of
an AP Jump Table so that, for example, vCPUs that have been parked by UEFI
can be started by the OS. Provide support for the AP Jump Table set/get
exit code.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/svm/sev.c  | 50 +
 arch/x86/kvm/svm/svm.c  |  7 +
 arch/x86/kvm/svm/svm.h  |  3 ++
 arch/x86/kvm/x86.c  |  9 ++
 5 files changed, 71 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d5ca8c6b0d5e..9218cb8a180a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1254,6 +1254,8 @@ struct kvm_x86_ops {
 
void (*migrate_timers)(struct kvm_vcpu *vcpu);
void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
+
+   void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index f6f1bb93f172..f771173021d8 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -16,6 +16,8 @@
 #include 
 #include 
 
+#include 
+
 #include "x86.h"
 #include "svm.h"
 #include "cpuid.h"
@@ -1359,6 +1361,8 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
if (!ghcb_sw_scratch_is_valid(ghcb))
goto vmgexit_err;
break;
+   case SVM_VMGEXIT_AP_HLT_LOOP:
+   case SVM_VMGEXIT_AP_JUMP_TABLE:
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
break;
default:
@@ -1674,6 +1678,35 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
control->exit_info_2,
svm->ghcb_sa);
break;
+   case SVM_VMGEXIT_AP_HLT_LOOP:
+   svm->ap_hlt_loop = true;
+   ret = kvm_emulate_halt(&svm->vcpu);
+   break;
+   case SVM_VMGEXIT_AP_JUMP_TABLE: {
+   struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+
+   switch (control->exit_info_1) {
+   case 0:
+   /* Set AP jump table address */
+   sev->ap_jump_table = control->exit_info_2;
+   break;
+   case 1:
+   /* Get AP jump table address */
+   ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+   break;
+   default:
+   pr_err("svm: vmgexit: unsupported AP jump table request 
- exit_info_1=%#llx\n",
+  control->exit_info_1);
+   ghcb_set_sw_exit_info_1(ghcb, 1);
+   ghcb_set_sw_exit_info_2(ghcb,
+   X86_TRAP_UD |
+   SVM_EVTINJ_TYPE_EXEPT |
+   SVM_EVTINJ_VALID);
+   }
+
+   ret = 1;
+   break;
+   }
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
vcpu_unimpl(&svm->vcpu, "vmgexit: unsupported event - 
exit_info_1=%#llx, exit_info_2=%#llx\n",
control->exit_info_1, control->exit_info_2);
@@ -1693,3 +1726,20 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, 
unsigned int port, int in)
return kvm_sev_es_string_io(&svm->vcpu, size, port,
svm->ghcb_sa, svm->ghcb_sa_len, in);
 }
+
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+   struct

[RFC PATCH v2 27/33] KVM: SVM: Add NMI support for an SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

The GHCB specification defines how NMIs are to be handled for an SEV-ES
guest. To detect the completion of an NMI the hypervisor must not
intercept the IRET instruction (because a #VC while running the NMI will
issue an IRET) and, instead, must receive an NMI Complete exit event from
the guest.

Update the KVM support for detecting the completion of NMIs in the guest
to follow the GHCB specification. When an SEV-ES guest is active, the
IRET instruction will no longer be intercepted. Now, when the NMI Complete
exit event is received, the iret_interception() function will be called
to simulate the completion of the NMI.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c |  4 
 arch/x86/kvm/svm/svm.c | 20 +---
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index f771173021d8..d30ceac85f88 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1361,6 +1361,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
if (!ghcb_sw_scratch_is_valid(ghcb))
goto vmgexit_err;
break;
+   case SVM_VMGEXIT_NMI_COMPLETE:
case SVM_VMGEXIT_AP_HLT_LOOP:
case SVM_VMGEXIT_AP_JUMP_TABLE:
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
@@ -1678,6 +1679,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
control->exit_info_2,
svm->ghcb_sa);
break;
+   case SVM_VMGEXIT_NMI_COMPLETE:
+   ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
+   break;
case SVM_VMGEXIT_AP_HLT_LOOP:
svm->ap_hlt_loop = true;
ret = kvm_emulate_halt(&svm->vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 71be48f0113e..69ccffd0aef0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2353,9 +2353,11 @@ static int cpuid_interception(struct vcpu_svm *svm)
 static int iret_interception(struct vcpu_svm *svm)
 {
++svm->vcpu.stat.nmi_window_exits;
-   svm_clr_intercept(svm, INTERCEPT_IRET);
svm->vcpu.arch.hflags |= HF_IRET_MASK;
-   svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+   if (!sev_es_guest(svm->vcpu.kvm)) {
+   svm_clr_intercept(svm, INTERCEPT_IRET);
+   svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+   }
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
return 1;
 }
@@ -3362,7 +3364,8 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 
svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
vcpu->arch.hflags |= HF_NMI_MASK;
-   svm_set_intercept(svm, INTERCEPT_IRET);
+   if (!sev_es_guest(svm->vcpu.kvm))
+   svm_set_intercept(svm, INTERCEPT_IRET);
++vcpu->stat.nmi_injections;
 }
 
@@ -3446,10 +3449,12 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, 
bool masked)
 
if (masked) {
svm->vcpu.arch.hflags |= HF_NMI_MASK;
-   svm_set_intercept(svm, INTERCEPT_IRET);
+   if (!sev_es_guest(svm->vcpu.kvm))
+   svm_set_intercept(svm, INTERCEPT_IRET);
} else {
svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
-   svm_clr_intercept(svm, INTERCEPT_IRET);
+   if (!sev_es_guest(svm->vcpu.kvm))
+   svm_clr_intercept(svm, INTERCEPT_IRET);
}
 }
 
@@ -3627,8 +3632,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 * If we've made progress since setting HF_IRET_MASK, we've
 * executed an IRET and can allow NMI injection.
 */
-   if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
-   && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
+   if ((svm->vcpu.arch.hflags & HF_IRET_MASK) &&
+   (sev_es_guest(svm->vcpu.kvm) ||
+kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip)) {
svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
}
-- 
2.28.0



[RFC PATCH v2 23/33] KVM: x86: Update __get_sregs() / __set_sregs() to support SEV-ES

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

Since many of the registers used by the SEV-ES are encrypted and cannot
be read or written, adjust the __get_sregs() / __set_sregs() to take into
account whether the VMSA/guest state is encrypted.

For __get_sregs(), return the actual value that is in use by the guest
for all registers being tracked using the write trap support.

For __set_sregs(), skip setting of all guest registers values.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/x86.c | 27 ++-
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 90a551360207..39c8d9a311d4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9320,6 +9320,9 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
 {
struct desc_ptr dt;
 
+   if (vcpu->arch.guest_state_protected)
+   goto skip_protected_regs;
+
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -9337,9 +9340,11 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
sregs->gdt.limit = dt.size;
sregs->gdt.base = dt.address;
 
-   sregs->cr0 = kvm_read_cr0(vcpu);
sregs->cr2 = vcpu->arch.cr2;
sregs->cr3 = kvm_read_cr3(vcpu);
+
+skip_protected_regs:
+   sregs->cr0 = kvm_read_cr0(vcpu);
sregs->cr4 = kvm_read_cr4(vcpu);
sregs->cr8 = kvm_get_cr8(vcpu);
sregs->efer = vcpu->arch.efer;
@@ -9478,6 +9483,9 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
if (kvm_set_apic_base(vcpu, &apic_base_msr))
goto out;
 
+   if (vcpu->arch.guest_state_protected)
+   goto skip_protected_regs;
+
dt.size = sregs->idt.limit;
dt.address = sregs->idt.base;
kvm_x86_ops.set_idt(vcpu, &dt);
@@ -9516,14 +9524,6 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
if (mmu_reset_needed)
kvm_mmu_reset_context(vcpu);
 
-   max_bits = KVM_NR_INTERRUPTS;
-   pending_vec = find_first_bit(
-   (const unsigned long *)sregs->interrupt_bitmap, max_bits);
-   if (pending_vec < max_bits) {
-   kvm_queue_interrupt(vcpu, pending_vec, false);
-   pr_debug("Set back pending irq %d\n", pending_vec);
-   }
-
kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -9542,6 +9542,15 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct 
kvm_sregs *sregs)
!is_protmode(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
+skip_protected_regs:
+   max_bits = KVM_NR_INTERRUPTS;
+   pending_vec = find_first_bit(
+   (const unsigned long *)sregs->interrupt_bitmap, max_bits);
+   if (pending_vec < max_bits) {
+   kvm_queue_interrupt(vcpu, pending_vec, false);
+   pr_debug("Set back pending irq %d\n", pending_vec);
+   }
+
kvm_make_request(KVM_REQ_EVENT, vcpu);
 
ret = 0;
-- 
2.28.0



[RFC PATCH v2 25/33] KVM: SVM: Guest FPU state save/restore not needed for SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

The guest FPU state is automatically restored on VMRUN and saved on VMEXIT
by the hardware, so there is no reason to do this in KVM. Eliminate the
allocation of the guest_fpu save area and key off that to skip operations
related to the guest FPU state.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/svm/svm.c  | 10 ++
 arch/x86/kvm/x86.c  | 56 +++--
 3 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 30f1300a05c0..d5ca8c6b0d5e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1444,6 +1444,8 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, 
u8 vector);
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
int reason, bool has_error_code, u32 error_code);
 
+void kvm_free_guest_fpu(struct kvm_vcpu *vcpu);
+
 int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long 
cr0);
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index de44f7f2b7a8..13560b90b81a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1301,6 +1301,14 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!vmsa_page)
goto error_free_hsave_page;
+
+   /*
+* SEV-ES guests maintain an encrypted version of their FPU
+* state which is restored and saved on VMRUN and VMEXIT.
+* Free the fpu structure to prevent KVM from attempting to
+* access the FPU state.
+*/
+   kvm_free_guest_fpu(vcpu);
}
 
err = avic_init_vcpu(svm);
@@ -3792,6 +3800,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu 
*vcpu)
svm_set_dr6(svm, DR6_FIXED_1 | DR6_RTM);
 
clgi();
+
kvm_load_guest_xsave_state(vcpu);
 
kvm_wait_lapic_expire(vcpu);
@@ -3837,6 +3846,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu 
*vcpu)
kvm_before_interrupt(&svm->vcpu);
 
kvm_load_host_xsave_state(vcpu);
+
stgi();
 
/* Any pending NMI will happen here */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 20fabf578ab7..931a17ba5cbd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4407,6 +4407,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 struct kvm_xsave *guest_xsave)
 {
+   if (!vcpu->arch.guest_fpu)
+   return;
+
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
memset(guest_xsave, 0, sizeof(struct kvm_xsave));
fill_xsave((u8 *) guest_xsave->region, vcpu);
@@ -4424,9 +4427,14 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu 
*vcpu,
 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
 {
-   u64 xstate_bv =
-   *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
-   u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / 
sizeof(u32)];
+   u64 xstate_bv;
+   u32 mxcsr;
+
+   if (!vcpu->arch.guest_fpu)
+   return 0;
+
+   xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / 
sizeof(u32)];
+   mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
 
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
/*
@@ -9126,9 +9134,14 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
kvm_save_current_fpu(vcpu->arch.user_fpu);
 
-   /* PKRU is separately restored in kvm_x86_ops.run.  */
-   __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
-   ~XFEATURE_MASK_PKRU);
+   /*
+* Guests with protected state can't have it set by the hypervisor,
+* so skip trying to set it.
+*/
+   if (vcpu->arch.guest_fpu)
+   /* PKRU is separately restored in kvm_x86_ops.run. */
+   __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
+   ~XFEATURE_MASK_PKRU);
 
fpregs_mark_activate();
fpregs_unlock();
@@ -9141,7 +9154,12 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
fpregs_lock();
 
-   kvm_save_current_fpu(vcpu->arch.guest_fpu);
+   /*
+* Guests with protected state can't have it read by the hypervisor,
+* so skip trying to save it.
+*/
+   

[RFC PATCH v2 15/33] KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x100

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

The GHCB specification defines a GHCB MSR protocol using the lower
12-bits of the GHCB MSR (in the hypervisor this corresponds to the
GHCB GPA field in the VMCB).

Function 0x100 is a request for termination of the guest. The guest has
encountered some situation for which it has requested to be terminated.
The GHCB MSR value contains the reason for the request.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 13 +
 arch/x86/kvm/svm/svm.h |  6 ++
 2 files changed, 19 insertions(+)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index f890f2e1650e..cecdd6d83d9a 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1482,6 +1482,19 @@ static int sev_handle_vmgexit_msr_protocol(struct 
vcpu_svm *svm)
  GHCB_MSR_INFO_POS);
break;
}
+   case GHCB_MSR_TERM_REQ: {
+   u64 reason_set, reason_code;
+
+   reason_set = get_ghcb_msr_bits(svm,
+  GHCB_MSR_TERM_REASON_SET_MASK,
+  GHCB_MSR_TERM_REASON_SET_POS);
+   reason_code = get_ghcb_msr_bits(svm,
+   GHCB_MSR_TERM_REASON_MASK,
+   GHCB_MSR_TERM_REASON_POS);
+   pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
+   reason_set, reason_code);
+   fallthrough;
+   }
default:
ret = -EINVAL;
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 817fb3bd66c3..8a53de9b6d03 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -534,6 +534,12 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 #define GHCB_MSR_CPUID_REG_POS 30
 #define GHCB_MSR_CPUID_REG_MASK0x3
 
+#define GHCB_MSR_TERM_REQ  0x100
+#define GHCB_MSR_TERM_REASON_SET_POS   12
+#define GHCB_MSR_TERM_REASON_SET_MASK  0xf
+#define GHCB_MSR_TERM_REASON_POS   16
+#define GHCB_MSR_TERM_REASON_MASK  0xff
+
 extern unsigned int max_sev_asid;
 
 static inline bool svm_sev_enabled(void)
-- 
2.28.0



[RFC PATCH v2 12/33] KVM: SVM: Create trace events for VMGEXIT processing

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

Add trace events for entry to and exit from VMGEXIT processing. The vCPU
id and the exit reason will be common for the trace events. The exit info
fields will represent the input and output values for the entry and exit
events, respectively.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c |  6 +
 arch/x86/kvm/trace.h   | 53 ++
 arch/x86/kvm/x86.c |  2 ++
 3 files changed, 61 insertions(+)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 06a7b63641af..500c845f4979 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -14,10 +14,12 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "x86.h"
 #include "svm.h"
 #include "cpuid.h"
+#include "trace.h"
 
 static int sev_flush_asids(void);
 static DECLARE_RWSEM(sev_deactivate_lock);
@@ -1372,6 +1374,8 @@ static void pre_sev_es_run(struct vcpu_svm *svm)
if (!svm->ghcb)
return;
 
+   trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
+
sev_es_sync_to_ghcb(svm);
 
kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
@@ -1436,6 +1440,8 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
svm->ghcb = svm->ghcb_map.hva;
ghcb = svm->ghcb_map.hva;
 
+   trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
+
exit_code = ghcb_get_sw_exit_code(ghcb);
 
ret = sev_es_validate_vmgexit(svm);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index aef960f90f26..7da931a511c9 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1578,6 +1578,59 @@ TRACE_EVENT(kvm_hv_syndbg_get_msr,
  __entry->vcpu_id, __entry->vp_index, __entry->msr,
  __entry->data)
 );
+
+/*
+ * Tracepoint for the start of VMGEXIT processing
+ */
+TRACE_EVENT(kvm_vmgexit_enter,
+   TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb),
+   TP_ARGS(vcpu_id, ghcb),
+
+   TP_STRUCT__entry(
+   __field(unsigned int, vcpu_id)
+   __field(u64, exit_reason)
+   __field(u64, info1)
+   __field(u64, info2)
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id = vcpu_id;
+   __entry->exit_reason = ghcb->save.sw_exit_code;
+   __entry->info1   = ghcb->save.sw_exit_info_1;
+   __entry->info2   = ghcb->save.sw_exit_info_2;
+   ),
+
+   TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx",
+ __entry->vcpu_id, __entry->exit_reason,
+ __entry->info1, __entry->info2)
+);
+
+/*
+ * Tracepoint for the end of VMGEXIT processing
+ */
+TRACE_EVENT(kvm_vmgexit_exit,
+   TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb),
+   TP_ARGS(vcpu_id, ghcb),
+
+   TP_STRUCT__entry(
+   __field(unsigned int, vcpu_id)
+   __field(u64, exit_reason)
+   __field(u64, info1)
+   __field(u64, info2)
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id = vcpu_id;
+   __entry->exit_reason = ghcb->save.sw_exit_code;
+   __entry->info1   = ghcb->save.sw_exit_info_1;
+   __entry->info2   = ghcb->save.sw_exit_info_2;
+   ),
+
+   TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx",
+ __entry->vcpu_id, __entry->exit_reason,
+ __entry->info1, __entry->info2)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b9125f49ddc..bc73ae18b90c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11174,3 +11174,5 @@ 
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
-- 
2.28.0



[RFC PATCH v2 16/33] KVM: SVM: Create trace events for VMGEXIT MSR protocol processing

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

Add trace events for entry to and exit from VMGEXIT MSR protocol
processing. The vCPU will be common for the trace events. The MSR
protocol processing is guided by the GHCB GPA in the VMCB, so the GHCB
GPA will represent the input and output values for the entry and exit
events, respectively. Additionally, the exit event will contain the
return code for the event.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c |  6 ++
 arch/x86/kvm/trace.h   | 44 ++
 arch/x86/kvm/x86.c |  2 ++
 3 files changed, 52 insertions(+)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index cecdd6d83d9a..4a4245b34bee 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1438,6 +1438,9 @@ static int sev_handle_vmgexit_msr_protocol(struct 
vcpu_svm *svm)
 
ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
 
+   trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
+control->ghcb_gpa);
+
switch (ghcb_info) {
case GHCB_MSR_SEV_INFO_REQ:
set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
@@ -1499,6 +1502,9 @@ static int sev_handle_vmgexit_msr_protocol(struct 
vcpu_svm *svm)
ret = -EINVAL;
}
 
+   trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
+   control->ghcb_gpa, ret);
+
return ret;
 }
 
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 7da931a511c9..2de30c20bc26 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1631,6 +1631,50 @@ TRACE_EVENT(kvm_vmgexit_exit,
  __entry->info1, __entry->info2)
 );
 
+/*
+ * Tracepoint for the start of VMGEXIT MSR procotol processing
+ */
+TRACE_EVENT(kvm_vmgexit_msr_protocol_enter,
+   TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa),
+   TP_ARGS(vcpu_id, ghcb_gpa),
+
+   TP_STRUCT__entry(
+   __field(unsigned int, vcpu_id)
+   __field(u64, ghcb_gpa)
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id  = vcpu_id;
+   __entry->ghcb_gpa = ghcb_gpa;
+   ),
+
+   TP_printk("vcpu %u, ghcb_gpa %016llx",
+ __entry->vcpu_id, __entry->ghcb_gpa)
+);
+
+/*
+ * Tracepoint for the end of VMGEXIT MSR procotol processing
+ */
+TRACE_EVENT(kvm_vmgexit_msr_protocol_exit,
+   TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa, int result),
+   TP_ARGS(vcpu_id, ghcb_gpa, result),
+
+   TP_STRUCT__entry(
+   __field(unsigned int, vcpu_id)
+   __field(u64, ghcb_gpa)
+   __field(int, result)
+   ),
+
+   TP_fast_assign(
+   __entry->vcpu_id  = vcpu_id;
+   __entry->ghcb_gpa = ghcb_gpa;
+   __entry->result   = result;
+   ),
+
+   TP_printk("vcpu %u, ghcb_gpa %016llx, result %d",
+ __entry->vcpu_id, __entry->ghcb_gpa, __entry->result)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bc73ae18b90c..61fda131d919 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11176,3 +11176,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
-- 
2.28.0



[RFC PATCH v2 20/33] KVM: SVM: Add support for CR0 write traps for an SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

For SEV-ES guests, the interception of control register write access
is not recommended. Control register interception occurs prior to the
control register being modified and the hypervisor is unable to modify
the control register itself because the register is located in the
encrypted register state.

SEV-ES support introduces new control register write traps. These traps
provide intercept support of a control register write after the control
register has been modified. The new control register value is provided in
the VMCB EXITINFO1 field, allowing the hypervisor to track the setting
of the guest control registers.

Add support to track the value of the guest CR0 register using the control
register write trap so that the hypervisor understands the guest operating
mode.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/include/uapi/asm/svm.h | 17 ++
 arch/x86/kvm/svm/svm.c  | 24 +++
 arch/x86/kvm/x86.c  | 41 +++--
 4 files changed, 66 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7e7ae3b85663..b021d992fa46 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1444,6 +1444,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, 
u8 vector);
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
int reason, bool has_error_code, u32 error_code);
 
+int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long 
cr0);
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 73ff94a28911..671b8b1ad9e1 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -78,6 +78,22 @@
 #define SVM_EXIT_XSETBV0x08d
 #define SVM_EXIT_RDPRU 0x08e
 #define SVM_EXIT_EFER_WRITE_TRAP   0x08f
+#define SVM_EXIT_CR0_WRITE_TRAP0x090
+#define SVM_EXIT_CR1_WRITE_TRAP0x091
+#define SVM_EXIT_CR2_WRITE_TRAP0x092
+#define SVM_EXIT_CR3_WRITE_TRAP0x093
+#define SVM_EXIT_CR4_WRITE_TRAP0x094
+#define SVM_EXIT_CR5_WRITE_TRAP0x095
+#define SVM_EXIT_CR6_WRITE_TRAP0x096
+#define SVM_EXIT_CR7_WRITE_TRAP0x097
+#define SVM_EXIT_CR8_WRITE_TRAP0x098
+#define SVM_EXIT_CR9_WRITE_TRAP0x099
+#define SVM_EXIT_CR10_WRITE_TRAP   0x09a
+#define SVM_EXIT_CR11_WRITE_TRAP   0x09b
+#define SVM_EXIT_CR12_WRITE_TRAP   0x09c
+#define SVM_EXIT_CR13_WRITE_TRAP   0x09d
+#define SVM_EXIT_CR14_WRITE_TRAP   0x09e
+#define SVM_EXIT_CR15_WRITE_TRAP   0x09f
 #define SVM_EXIT_INVPCID   0x0a2
 #define SVM_EXIT_NPF   0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI   0x401
@@ -186,6 +202,7 @@
{ SVM_EXIT_MWAIT,   "mwait" }, \
{ SVM_EXIT_XSETBV,  "xsetbv" }, \
{ SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \
+   { SVM_EXIT_CR0_WRITE_TRAP,  "write_cr0_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f5bf40c7ba74..913da18520a2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2478,6 +2478,29 @@ static int cr_interception(struct vcpu_svm *svm)
return kvm_complete_insn_gp(&svm->vcpu, err);
 }
 
+static int cr_trap(struct vcpu_svm *svm)
+{
+   unsigned long old_value, new_value;
+   unsigned int cr;
+   int ret;
+
+   new_value = (unsigned long)svm->vmcb->control.exit_info_1;
+
+   cr = svm->vmcb->control.exit_code - SVM_EXIT_CR0_WRITE_TRAP;
+   switch (cr) {
+   case 0:
+   old_value = kvm_read_cr0(&svm->vcpu);
+
+   ret = __kvm_set_cr0(&svm->vcpu, old_value, new_value);
+   break;
+   default:
+   WARN(1, "unhandled CR%d write trap", cr);
+   ret = 1;
+   }
+
+   return kvm_complete_insn_gp(&svm->vcpu, ret);
+}
+
 static int dr_interception(struct vcpu_svm *svm)
 {
int reg, dr;
@@ -3059,6 +3082,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_XSETBV]   = xsetbv_interception,
[SVM_EXIT_RDPRU]= rdpru_interception

[RFC PATCH v2 11/33] KVM: SVM: Add initial support for a VMGEXIT VMEXIT

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

SEV-ES adds a new VMEXIT reason code, VMGEXIT. Initial support for a
VMGEXIT includes mapping the GHCB based on the guest GPA, which is
obtained from a new VMCB field, and then validating the required inputs
for the VMGEXIT exit reason.

Since many of the VMGEXIT exit reasons correspond to existing VMEXIT
reasons, the information from the GHCB is copied into the VMCB control
exit code areas and KVM register areas. The standard exit handlers are
invoked, similar to standard VMEXIT processing. Before restarting the
vCPU, the GHCB is updated with any registers that have been updated by
the hypervisor.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/svm.h  |   2 +-
 arch/x86/include/uapi/asm/svm.h |   7 +
 arch/x86/kvm/cpuid.c|   1 +
 arch/x86/kvm/svm/sev.c  | 247 
 arch/x86/kvm/svm/svm.c  |   8 +-
 arch/x86/kvm/svm/svm.h  |   8 ++
 6 files changed, 270 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index bce28482d63d..caa8628f5fba 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -130,7 +130,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 exit_int_info_err;
u64 nested_ctl;
u64 avic_vapic_bar;
-   u8 reserved_4[8];
+   u64 ghcb_gpa;
u32 event_inj;
u32 event_inj_err;
u64 nested_cr3;
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index f5f826ab4e3f..3a730238a646 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -81,6 +81,7 @@
 #define SVM_EXIT_NPF   0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI   0x401
 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
+#define SVM_EXIT_VMGEXIT   0x403
 
 /* SEV-ES software-defined VMGEXIT events */
 #define SVM_VMGEXIT_MMIO_READ  0x8001
@@ -187,6 +188,12 @@
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
+   { SVM_EXIT_VMGEXIT, "vmgexit" }, \
+   { SVM_VMGEXIT_MMIO_READ,"vmgexit_mmio_read" }, \
+   { SVM_VMGEXIT_MMIO_WRITE,   "vmgexit_mmio_write" }, \
+   { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \
+   { SVM_VMGEXIT_AP_HLT_LOOP,  "vmgexit_ap_hlt_loop" }, \
+   { SVM_VMGEXIT_AP_JUMP_TABLE,"vmgexit_ap_jump_table" }, \
{ SVM_EXIT_ERR, "invalid_guest_state" }
 
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 37c3668a774f..e2f303a332c1 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -115,6 +115,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
   MSR_IA32_MISC_ENABLE_MWAIT);
}
 }
+EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
 
 static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 9af8369450b2..06a7b63641af 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -17,6 +17,7 @@
 
 #include "x86.h"
 #include "svm.h"
+#include "cpuid.h"
 
 static int sev_flush_asids(void);
 static DECLARE_RWSEM(sev_deactivate_lock);
@@ -1189,11 +1190,202 @@ void sev_hardware_teardown(void)
sev_flush_asids();
 }
 
+static void dump_ghcb(struct ghcb *ghcb)
+{
+   unsigned int nbits;
+
+   /* Re-use the dump_invalid_vmcb module parameter */
+   if (!dump_invalid_vmcb) {
+   pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump 
internal KVM state.\n");
+   return;
+   }
+
+   nbits = sizeof(ghcb->save.valid_bitmap) * 8;
+
+   pr_err("GHCB:\n");
+   pr_err("%-20s%08llx\n", "sw_exit_code", ghcb->save.sw_exit_code);
+   pr_err("%-20s%08llx\n", "sw_exit_info_1", ghcb->save.sw_exit_info_1);
+   pr_err("%-20s%08llx\n", "sw_exit_info_2", ghcb->save.sw_exit_info_2);
+   pr_err("%-20s%08llx\n", "sw_scratch", ghcb->save.sw_scratch);
+   pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
+}
+
+static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
+{
+   struct kvm_vcpu *vcpu = &svm->vcpu;
+   struct ghcb *ghcb = svm->ghcb;
+
+   /*
+* The GHCB protocol so far allows for the following data
+* to be returned:
+*   GPRs RAX, RBX, RCX, RDX
+*
+* Copy their values to the GHCB if they are dirty.
+*/
+   if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
+   ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);

[RFC PATCH v2 19/33] KVM: SVM: Add support for EFER write traps for an SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

For SEV-ES guests, the interception of EFER write access is not
recommended. EFER interception occurs prior to EFER being modified and
the hypervisor is unable to modify EFER itself because the register is
located in the encrypted register state.

SEV-ES support introduces a new EFER write trap. This trap provides
intercept support of an EFER write after it has been modified. The new
EFER value is provided in the VMCB EXITINFO1 field, allowing the
hypervisor to track the setting of the guest EFER.

Add support to track the value of the guest EFER value using the EFER
write trap so that the hypervisor understands the guest operating mode.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/uapi/asm/svm.h |  2 ++
 arch/x86/kvm/svm/svm.c  | 20 
 2 files changed, 22 insertions(+)

diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 3a730238a646..73ff94a28911 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -77,6 +77,7 @@
 #define SVM_EXIT_MWAIT_COND0x08c
 #define SVM_EXIT_XSETBV0x08d
 #define SVM_EXIT_RDPRU 0x08e
+#define SVM_EXIT_EFER_WRITE_TRAP   0x08f
 #define SVM_EXIT_INVPCID   0x0a2
 #define SVM_EXIT_NPF   0x400
 #define SVM_EXIT_AVIC_INCOMPLETE_IPI   0x401
@@ -184,6 +185,7 @@
{ SVM_EXIT_MONITOR, "monitor" }, \
{ SVM_EXIT_MWAIT,   "mwait" }, \
{ SVM_EXIT_XSETBV,  "xsetbv" }, \
+   { SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 14285bb832de..f5bf40c7ba74 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2531,6 +2531,25 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
 }
 
+static int efer_trap(struct vcpu_svm *svm)
+{
+   struct msr_data msr_info;
+   int ret;
+
+   /*
+* Clear the EFER_SVME bit from EFER. The SVM code always sets this
+* bit in svm_set_efer(), but __kvm_valid_efer() checks it against
+* whether the guest has X86_FEATURE_SVM - this avoids a failure if
+* the guest doesn't have X86_FEATURE_SVM.
+*/
+   msr_info.host_initiated = false;
+   msr_info.index = MSR_EFER;
+   msr_info.data = svm->vmcb->control.exit_info_1 & ~EFER_SVME;
+   ret = kvm_set_msr_common(&svm->vcpu, &msr_info);
+
+   return kvm_complete_insn_gp(&svm->vcpu, ret);
+}
+
 static int svm_get_msr_feature(struct kvm_msr_entry *msr)
 {
msr->data = 0;
@@ -3039,6 +3058,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_MWAIT]= mwait_interception,
[SVM_EXIT_XSETBV]   = xsetbv_interception,
[SVM_EXIT_RDPRU]= rdpru_interception,
+   [SVM_EXIT_EFER_WRITE_TRAP]  = efer_trap,
[SVM_EXIT_INVPCID]  = invpcid_interception,
[SVM_EXIT_NPF]  = npf_interception,
[SVM_EXIT_RSM]  = rsm_interception,
-- 
2.28.0



[RFC PATCH v2 18/33] KVM: SVM: Support port IO operations for an SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

For an SEV-ES guest, string-based port IO is performed to a shared
(un-encrypted) page so that both the hypervisor and guest can read or
write to it and each see the contents.

For string-based port IO operations, invoke SEV-ES specific routines that
can complete the operation using common KVM port IO support.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm/sev.c  | 13 +
 arch/x86/kvm/svm/svm.c  | 11 +--
 arch/x86/kvm/svm/svm.h  |  1 +
 arch/x86/kvm/x86.c  | 51 +
 arch/x86/kvm/x86.h  |  3 ++
 6 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 355fef2cd4e2..7e7ae3b85663 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -614,6 +614,7 @@ struct kvm_vcpu_arch {
 
struct kvm_pio_request pio;
void *pio_data;
+   void *guest_ins_data;
 
u8 event_exit_inst_len;
 
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1d287f5cffac..f6f1bb93f172 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1320,6 +1320,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
if (!ghcb_rax_is_valid(ghcb))
goto vmgexit_err;
+
+   if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK)
+   if (!ghcb_sw_scratch_is_valid(ghcb))
+   goto vmgexit_err;
break;
case SVM_EXIT_MSR:
if (!ghcb_rcx_is_valid(ghcb))
@@ -1680,3 +1684,12 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
 
return ret;
 }
+
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
+{
+   if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
+   return -EINVAL;
+
+   return kvm_sev_es_string_io(&svm->vcpu, size, port,
+   svm->ghcb_sa, svm->ghcb_sa_len, in);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ac5288a14f18..14285bb832de 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2072,11 +2072,16 @@ static int io_interception(struct vcpu_svm *svm)
++svm->vcpu.stat.io_exits;
string = (io_info & SVM_IOIO_STR_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
-   if (string)
-   return kvm_emulate_instruction(vcpu, 0);
-
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
+
+   if (string) {
+   if (sev_es_guest(vcpu->kvm))
+   return sev_es_string_io(svm, size, port, in);
+   else
+   return kvm_emulate_instruction(vcpu, 0);
+   }
+
svm->next_rip = svm->vmcb->control.exit_info_2;
 
return kvm_fast_pio(&svm->vcpu, size, port, in);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 386b6b21d93a..084ba4dfd9e2 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -563,5 +563,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu);
 void __init sev_hardware_setup(void);
 void sev_hardware_teardown(void);
 int sev_handle_vmgexit(struct vcpu_svm *svm);
+int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int 
in);
 
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 762f57ca059f..a5e747f80865 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10643,6 +10643,10 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
 {
+   /* Can't read the RIP when guest state is protected, just return 0 */
+   if (vcpu->arch.guest_state_protected)
+   return 0;
+
if (is_64_bit_mode(vcpu))
return kvm_rip_read(vcpu);
return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
@@ -11275,6 +11279,53 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t 
gpa, unsigned int bytes,
 }
 EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
 
+static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+{
+   memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data,
+  vcpu->arch.pio.count * vcpu->arch.pio.size);
+   vcpu->arch.pio.count = 0;
+
+   return 1;
+}
+
+static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
+  unsigned int port, void *data,  unsigned int count)
+{
+   int ret;
+
+   ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port,
+   data, count);
+   vcpu->arch.pio.count = 0;
+
+   return 0;
+}
+
+static int kvm_sev_es

[RFC PATCH v2 17/33] KVM: SVM: Support MMIO for an SEV-ES guest

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

For an SEV-ES guest, MMIO is performed to a shared (un-encrypted) page
so that both the hypervisor and guest can read or write to it and each
see the contents.

The GHCB specification provides software-defined VMGEXIT exit codes to
indicate a request for an MMIO read or an MMIO write. Add support to
recognize the MMIO requests and invoke SEV-ES specific routines that
can complete the MMIO operation. These routines use common KVM support
to complete the MMIO operation.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 121 
 arch/x86/kvm/svm/svm.c |   3 +
 arch/x86/kvm/svm/svm.h |   6 ++
 arch/x86/kvm/x86.c | 123 +
 arch/x86/kvm/x86.h |   5 ++
 5 files changed, 258 insertions(+)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 4a4245b34bee..1d287f5cffac 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1350,6 +1350,11 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
!ghcb_rcx_is_valid(ghcb))
goto vmgexit_err;
break;
+   case SVM_VMGEXIT_MMIO_READ:
+   case SVM_VMGEXIT_MMIO_WRITE:
+   if (!ghcb_sw_scratch_is_valid(ghcb))
+   goto vmgexit_err;
+   break;
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
break;
default:
@@ -1378,6 +1383,24 @@ static void pre_sev_es_run(struct vcpu_svm *svm)
if (!svm->ghcb)
return;
 
+   if (svm->ghcb_sa_free) {
+   /*
+* The scratch area lives outside the GHCB, so there is a
+* buffer that, depending on the operation performed, may
+* need to be synced, then freed.
+*/
+   if (svm->ghcb_sa_sync) {
+   kvm_write_guest(svm->vcpu.kvm,
+   ghcb_get_sw_scratch(svm->ghcb),
+   svm->ghcb_sa, svm->ghcb_sa_len);
+   svm->ghcb_sa_sync = false;
+   }
+
+   kfree(svm->ghcb_sa);
+   svm->ghcb_sa = NULL;
+   svm->ghcb_sa_free = false;
+   }
+
trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
 
sev_es_sync_to_ghcb(svm);
@@ -1412,6 +1435,86 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
 }
 
+#define GHCB_SCRATCH_AREA_LIMIT(16ULL * PAGE_SIZE)
+static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
+{
+   struct vmcb_control_area *control = &svm->vmcb->control;
+   struct ghcb *ghcb = svm->ghcb;
+   u64 ghcb_scratch_beg, ghcb_scratch_end;
+   u64 scratch_gpa_beg, scratch_gpa_end;
+   void *scratch_va;
+
+   scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+   if (!scratch_gpa_beg) {
+   pr_err("vmgexit: scratch gpa not provided\n");
+   return false;
+   }
+
+   scratch_gpa_end = scratch_gpa_beg + len;
+   if (scratch_gpa_end < scratch_gpa_beg) {
+   pr_err("vmgexit: scratch length (%#llx) not valid for scratch 
address (%#llx)\n",
+  len, scratch_gpa_beg);
+   return false;
+   }
+
+   if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
+   /* Scratch area begins within GHCB */
+   ghcb_scratch_beg = control->ghcb_gpa +
+  offsetof(struct ghcb, shared_buffer);
+   ghcb_scratch_end = control->ghcb_gpa +
+  offsetof(struct ghcb, reserved_1);
+
+   /*
+* If the scratch area begins within the GHCB, it must be
+* completely contained in the GHCB shared buffer area.
+*/
+   if (scratch_gpa_beg < ghcb_scratch_beg ||
+   scratch_gpa_end > ghcb_scratch_end) {
+   pr_err("vmgexit: scratch area is outside of GHCB shared 
buffer area (%#llx - %#llx)\n",
+  scratch_gpa_beg, scratch_gpa_end);
+   return false;
+   }
+
+   scratch_va = (void *)svm->ghcb;
+   scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
+   } else {
+   /*
+* The guest memory must be read into a kernel buffer, so
+* limit the size
+*/
+   if (len > GHCB_SCRATCH_AREA_LIMIT) {
+   pr_err("vmgexit: scratch area exceeds KVM limits (%#llx 
requested, %#llx limit)\n",
+  len, GHCB_SCRATCH_AREA_LIMIT);
+   return false;
+   }
+   scratch_va = kzalloc(len, GFP_KERN

[RFC PATCH v2 14/33] KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x004

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

The GHCB specification defines a GHCB MSR protocol using the lower
12-bits of the GHCB MSR (in the hypervisor this corresponds to the
GHCB GPA field in the VMCB).

Function 0x004 is a request for CPUID information. Only a single CPUID
result register can be sent per invocation, so the protocol defines the
register that is requested. The GHCB MSR value is set to the CPUID
register value as per the specification via the VMCB GHCB GPA field.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 56 --
 arch/x86/kvm/svm/svm.h |  9 +++
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index fb0410fd2f68..f890f2e1650e 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1412,6 +1412,18 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
 }
 
+static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
+ unsigned int pos)
+{
+   svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
+   svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
+}
+
+static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
+{
+   return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
+}
+
 static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
 {
svm->vmcb->control.ghcb_gpa = value;
@@ -1420,7 +1432,9 @@ static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 {
struct vmcb_control_area *control = &svm->vmcb->control;
+   struct kvm_vcpu *vcpu = &svm->vcpu;
u64 ghcb_info;
+   int ret = 1;
 
ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
 
@@ -1430,11 +1444,49 @@ static int sev_handle_vmgexit_msr_protocol(struct 
vcpu_svm *svm)
GHCB_VERSION_MIN,
sev_enc_bit));
break;
+   case GHCB_MSR_CPUID_REQ: {
+   u64 cpuid_fn, cpuid_reg, cpuid_value;
+
+   cpuid_fn = get_ghcb_msr_bits(svm,
+GHCB_MSR_CPUID_FUNC_MASK,
+GHCB_MSR_CPUID_FUNC_POS);
+
+   /* Initialize the registers needed by the CPUID intercept */
+   vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
+   vcpu->arch.regs[VCPU_REGS_RCX] = 0;
+
+   ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
+   if (!ret) {
+   ret = -EINVAL;
+   break;
+   }
+
+   cpuid_reg = get_ghcb_msr_bits(svm,
+ GHCB_MSR_CPUID_REG_MASK,
+ GHCB_MSR_CPUID_REG_POS);
+   if (cpuid_reg == 0)
+   cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
+   else if (cpuid_reg == 1)
+   cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
+   else if (cpuid_reg == 2)
+   cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
+   else
+   cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];
+
+   set_ghcb_msr_bits(svm, cpuid_value,
+ GHCB_MSR_CPUID_VALUE_MASK,
+ GHCB_MSR_CPUID_VALUE_POS);
+
+   set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
+ GHCB_MSR_INFO_MASK,
+ GHCB_MSR_INFO_POS);
+   break;
+   }
default:
-   return -EINVAL;
+   ret = -EINVAL;
}
 
-   return 1;
+   return ret;
 }
 
 int sev_handle_vmgexit(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 487fdc0c986b..817fb3bd66c3 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -525,6 +525,15 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |\
 GHCB_MSR_SEV_INFO_RESP)
 
+#define GHCB_MSR_CPUID_REQ 0x004
+#define GHCB_MSR_CPUID_RESP0x005
+#define GHCB_MSR_CPUID_FUNC_POS32
+#define GHCB_MSR_CPUID_FUNC_MASK   0x
+#define GHCB_MSR_CPUID_VALUE_POS   32
+#define GHCB_MSR_CPUID_VALUE_MASK  0x
+#define GHCB_MSR_CPUID_REG_POS 30
+#define GHCB_MSR_CPUID_REG_MASK0x3
+
 extern unsigned int max_sev_asid;
 
 static inline bool svm_sev_enabled(void)
-- 
2.28.0



[RFC PATCH v2 09/33] KVM: SVM: Cannot re-initialize the VMCB after shutdown with SEV-ES

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

When a SHUTDOWN VMEXIT is encountered, normally the VMCB is re-initialized
so that the guest can be re-launched. But when a guest is running as an
SEV-ES guest, the VMSA cannot be re-initialized because it has been
encrypted. For now, just return -EINVAL to prevent a possible attempt at
a guest reset.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 51041eb9758a..5fd77229fefc 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2041,6 +2041,13 @@ static int shutdown_interception(struct vcpu_svm *svm)
 {
struct kvm_run *kvm_run = svm->vcpu.run;
 
+   /*
+* The VM save area has already been encrypted so it
+* cannot be reinitialized - just terminate.
+*/
+   if (sev_es_guest(svm->vcpu.kvm))
+   return -EINVAL;
+
/*
 * VMCB is undefined after a SHUTDOWN intercept
 * so reinitialize it.
-- 
2.28.0



[RFC PATCH v2 07/33] KVM: SVM: Prevent debugging under SEV-ES

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

Since the guest register state of an SEV-ES guest is encrypted, debugging
is not supported. Update the code to prevent guest debugging when the
guest has protected state.

Additionally, an SEV-ES guest must only and always intercept DR7 reads and
writes. Update set_dr_intercepts() and clr_dr_intercepts() to account for
this.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c |  9 +
 arch/x86/kvm/svm/svm.h | 81 +++---
 arch/x86/kvm/x86.c |  3 ++
 3 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index fb0e8a0881f8..5270735bbdd8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1817,6 +1817,9 @@ static void svm_set_dr6(struct vcpu_svm *svm, unsigned 
long value)
 {
struct vmcb *vmcb = svm->vmcb;
 
+   if (svm->vcpu.arch.guest_state_protected)
+   return;
+
if (unlikely(value != vmcb->save.dr6)) {
vmcb->save.dr6 = value;
vmcb_mark_dirty(vmcb, VMCB_DR);
@@ -1827,6 +1830,9 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu 
*vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
 
+   if (vcpu->arch.guest_state_protected)
+   return;
+
get_debugreg(vcpu->arch.db[0], 0);
get_debugreg(vcpu->arch.db[1], 1);
get_debugreg(vcpu->arch.db[2], 2);
@@ -1845,6 +1851,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned 
long value)
 {
struct vcpu_svm *svm = to_svm(vcpu);
 
+   if (vcpu->arch.guest_state_protected)
+   return;
+
svm->vmcb->save.dr7 = value;
vmcb_mark_dirty(svm->vmcb, VMCB_DR);
 }
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 09e78487e5d0..e6900c62f164 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -196,6 +196,28 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
return container_of(kvm, struct kvm_svm, kvm);
 }
 
+static inline bool sev_guest(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+   return sev->active;
+#else
+   return false;
+#endif
+}
+
+static inline bool sev_es_guest(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_AMD_SEV
+   struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+   return sev_guest(kvm) && sev->es_active;
+#else
+   return false;
+#endif
+}
+
 static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
 {
vmcb->control.clean = 0;
@@ -247,21 +269,24 @@ static inline void set_dr_intercepts(struct vcpu_svm *svm)
 {
struct vmcb *vmcb = get_host_vmcb(svm);
 
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+   if (!sev_es_guest(svm->vcpu.kvm)) {
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+   }
+
vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
-   vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRI

[RFC PATCH v2 13/33] KVM: SVM: Add support for SEV-ES GHCB MSR protocol function 0x002

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

The GHCB specification defines a GHCB MSR protocol using the lower
12-bits of the GHCB MSR (in the hypervisor this corresponds to the
GHCB GPA field in the VMCB).

Function 0x002 is a request to set the GHCB MSR value to the SEV INFO as
per the specification via the VMCB GHCB GPA field.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 26 +-
 arch/x86/kvm/svm/svm.h | 17 +
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 500c845f4979..fb0410fd2f68 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -21,6 +21,7 @@
 #include "cpuid.h"
 #include "trace.h"
 
+static u8 sev_enc_bit;
 static int sev_flush_asids(void);
 static DECLARE_RWSEM(sev_deactivate_lock);
 static DEFINE_MUTEX(sev_bitmap_lock);
@@ -1140,6 +1141,9 @@ void __init sev_hardware_setup(void)
/* Retrieve SEV CPUID information */
cpuid(0x801f, &eax, &ebx, &ecx, &edx);
 
+   /* Set encryption bit location for SEV-ES guests */
+   sev_enc_bit = ebx & 0x3f;
+
/* Maximum number of encrypted guests supported simultaneously */
max_sev_asid = ecx;
 
@@ -1408,9 +1412,29 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
 }
 
+static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
+{
+   svm->vmcb->control.ghcb_gpa = value;
+}
+
 static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 {
-   return -EINVAL;
+   struct vmcb_control_area *control = &svm->vmcb->control;
+   u64 ghcb_info;
+
+   ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;
+
+   switch (ghcb_info) {
+   case GHCB_MSR_SEV_INFO_REQ:
+   set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
+   GHCB_VERSION_MIN,
+   sev_enc_bit));
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return 1;
 }
 
 int sev_handle_vmgexit(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 67ea93b284a8..487fdc0c986b 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -505,9 +505,26 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 
 /* sev.c */
 
+#define GHCB_VERSION_MAX   1ULL
+#define GHCB_VERSION_MIN   1ULL
+
 #define GHCB_MSR_INFO_POS  0
 #define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1)
 
+#define GHCB_MSR_SEV_INFO_RESP 0x001
+#define GHCB_MSR_SEV_INFO_REQ  0x002
+#define GHCB_MSR_VER_MAX_POS   48
+#define GHCB_MSR_VER_MAX_MASK  0x
+#define GHCB_MSR_VER_MIN_POS   32
+#define GHCB_MSR_VER_MIN_MASK  0x
+#define GHCB_MSR_CBIT_POS  24
+#define GHCB_MSR_CBIT_MASK 0xff
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)   \
+   _max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) |   \
+(((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) |   \
+(((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |\
+GHCB_MSR_SEV_INFO_RESP)
+
 extern unsigned int max_sev_asid;
 
 static inline bool svm_sev_enabled(void)
-- 
2.28.0



[RFC PATCH v2 10/33] KVM: SVM: Prepare for SEV-ES exit handling in the sev.c file

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

This is a pre-patch to consolidate some exit handling code into callable
functions. Follow-on patches for SEV-ES exit handling will then be able
to use them from the sev.c file.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c | 64 +-
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5fd77229fefc..ccae0f63e784 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3156,6 +3156,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
   "excp_to:", save->last_excp_to);
 }
 
+static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+{
+   if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+   svm_exit_handlers[exit_code])
+   return 0;
+
+   vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
+   dump_vmcb(vcpu);
+   vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+   vcpu->run->internal.suberror = 
KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+   vcpu->run->internal.ndata = 2;
+   vcpu->run->internal.data[0] = exit_code;
+   vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+
+   return -EINVAL;
+}
+
+static int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code)
+{
+   if (svm_handle_invalid_exit(&svm->vcpu, exit_code))
+   return 0;
+
+#ifdef CONFIG_RETPOLINE
+   if (exit_code == SVM_EXIT_MSR)
+   return msr_interception(svm);
+   else if (exit_code == SVM_EXIT_VINTR)
+   return interrupt_window_interception(svm);
+   else if (exit_code == SVM_EXIT_INTR)
+   return intr_interception(svm);
+   else if (exit_code == SVM_EXIT_HLT)
+   return halt_interception(svm);
+   else if (exit_code == SVM_EXIT_NPF)
+   return npf_interception(svm);
+#endif
+   return svm_exit_handlers[exit_code](svm);
+}
+
 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
  u32 *intr_info, u32 *error_code)
 {
@@ -3222,32 +3259,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t 
exit_fastpath)
if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;
 
-   if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
-   || !svm_exit_handlers[exit_code]) {
-   vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", 
exit_code);
-   dump_vmcb(vcpu);
-   vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-   vcpu->run->internal.suberror =
-   KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
-   vcpu->run->internal.ndata = 2;
-   vcpu->run->internal.data[0] = exit_code;
-   vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
-   return 0;
-   }
-
-#ifdef CONFIG_RETPOLINE
-   if (exit_code == SVM_EXIT_MSR)
-   return msr_interception(svm);
-   else if (exit_code == SVM_EXIT_VINTR)
-   return interrupt_window_interception(svm);
-   else if (exit_code == SVM_EXIT_INTR)
-   return intr_interception(svm);
-   else if (exit_code == SVM_EXIT_HLT)
-   return halt_interception(svm);
-   else if (exit_code == SVM_EXIT_NPF)
-   return npf_interception(svm);
-#endif
-   return svm_exit_handlers[exit_code](svm);
+   return svm_invoke_exit_handler(svm, exit_code);
 }
 
 static void reload_tss(struct kvm_vcpu *vcpu)
-- 
2.28.0



[RFC PATCH v2 08/33] KVM: SVM: Do not allow instruction emulation under SEV-ES

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

When a guest is running as an SEV-ES guest, it is not possible to emulate
instructions. Add support to prevent instruction emulation.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5270735bbdd8..51041eb9758a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4201,6 +4201,12 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu 
*vcpu, void *insn, int i
bool smep, smap, is_user;
unsigned long cr4;
 
+   /*
+* When the guest is an SEV-ES guest, emulation is not possible.
+*/
+   if (sev_es_guest(vcpu->kvm))
+   return false;
+
/*
 * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
 *
-- 
2.28.0



[RFC PATCH v2 05/33] KVM: x86: Mark GPRs dirty when written

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

When performing VMGEXIT processing for an SEV-ES guest, register values
will be synced between KVM and the GHCB. Prepare for detecting when a GPR
has been updated (marked dirty) in order to determine whether to sync the
register to the GHCB.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/kvm_cache_regs.h | 51 ++-
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index cfe83d4ae625..9ab7974857cd 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,6 +9,31 @@
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD)
 
+static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
+enum kvm_reg reg)
+{
+   return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
+enum kvm_reg reg)
+{
+   return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
+  enum kvm_reg reg)
+{
+   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
+  enum kvm_reg reg)
+{
+   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
 #define BUILD_KVM_GPR_ACCESSORS(lname, uname)\
 static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
 {\
@@ -18,6 +43,7 @@ static __always_inline void kvm_##lname##_write(struct 
kvm_vcpu *vcpu,  \
unsigned long val)\
 {\
vcpu->arch.regs[VCPU_REGS_##uname] = val; \
+   kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname); \
 }
 BUILD_KVM_GPR_ACCESSORS(rax, RAX)
 BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
@@ -37,31 +63,6 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
 BUILD_KVM_GPR_ACCESSORS(r15, R15)
 #endif
 
-static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
-enum kvm_reg reg)
-{
-   return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
-enum kvm_reg reg)
-{
-   return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
-static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
-  enum kvm_reg reg)
-{
-   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
-  enum kvm_reg reg)
-{
-   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-   __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
 {
if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
-- 
2.28.0



[RFC PATCH v2 06/33] KVM: SVM: Add required changes to support intercepts under SEV-ES

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

When a guest is running under SEV-ES, the hypervisor cannot access the
guest register state. There are numerous places in the KVM code where
certain registers are accessed that are not allowed to be accessed (e.g.
RIP, CR0, etc). Add checks to prevent register accesses and add intercept
update support at various points within the KVM code.

Also, when handling a VMGEXIT, exceptions are passed back through the
GHCB. Since the RDMSR/WRMSR intercepts (may) inject a #GP on error,
update the SVM intercepts to handle this for SEV-ES guests.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/svm.h |   3 +-
 arch/x86/kvm/svm/svm.c | 111 +
 arch/x86/kvm/x86.c |   6 +-
 3 files changed, 107 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1edf24f51b53..bce28482d63d 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -178,7 +178,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 #define LBR_CTL_ENABLE_MASK BIT_ULL(0)
 #define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
 
-#define SVM_INTERRUPT_SHADOW_MASK 1
+#define SVM_INTERRUPT_SHADOW_MASK  BIT_ULL(0)
+#define SVM_GUEST_INTERRUPT_MASK   BIT_ULL(1)
 
 #define SVM_IOIO_STR_SHIFT 2
 #define SVM_IOIO_REP_SHIFT 3
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5bbdbaefcd9e..fb0e8a0881f8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include "trace.h"
@@ -320,6 +321,13 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
 
+   /*
+* SEV-ES does not expose the next RIP. The RIP update is controlled by
+* the type of exit and the #VC handler in the guest.
+*/
+   if (sev_es_guest(vcpu->kvm))
+   goto done;
+
if (nrips && svm->vmcb->control.next_rip != 0) {
WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
svm->next_rip = svm->vmcb->control.next_rip;
@@ -331,6 +339,8 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
} else {
kvm_rip_write(vcpu, svm->next_rip);
}
+
+done:
svm_set_interrupt_shadow(vcpu, 0);
 
return 1;
@@ -1666,9 +1676,18 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct 
desc_ptr *dt)
 
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
-   ulong gcr0 = svm->vcpu.arch.cr0;
-   u64 *hcr0 = &svm->vmcb->save.cr0;
+   ulong gcr0;
+   u64 *hcr0;
+
+   /*
+* SEV-ES guests must always keep the CR intercepts cleared. CR
+* tracking is done using the CR write traps.
+*/
+   if (sev_es_guest(svm->vcpu.kvm))
+   return;
 
+   gcr0 = svm->vcpu.arch.cr0;
+   hcr0 = &svm->vmcb->save.cr0;
*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
| (gcr0 & SVM_CR0_SELECTIVE_MASK);
 
@@ -1688,7 +1707,7 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-   if (vcpu->arch.efer & EFER_LME) {
+   if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
vcpu->arch.efer |= EFER_LMA;
svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
@@ -2613,7 +2632,29 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-   return kvm_emulate_rdmsr(&svm->vcpu);
+   u32 ecx;
+   u64 data;
+
+   if (!sev_es_guest(svm->vcpu.kvm))
+   return kvm_emulate_rdmsr(&svm->vcpu);
+
+   ecx = kvm_rcx_read(&svm->vcpu);
+   if (kvm_get_msr(&svm->vcpu, ecx, &data)) {
+   trace_kvm_msr_read_ex(ecx);
+   ghcb_set_sw_exit_info_1(svm->ghcb, 1);
+   ghcb_set_sw_exit_info_2(svm->ghcb,
+   X86_TRAP_GP |
+   SVM_EVTINJ_TYPE_EXEPT |
+   SVM_EVTINJ_VALID);
+   return 1;
+   }
+
+   trace_kvm_msr_read(ecx, data);
+
+   kvm_rax_write(&svm->vcpu, data & -1u);
+   kvm_rdx_write(&svm->vcpu, (data >> 32) & -1u);
+
+   return kvm_skip_emulated_instruction(&svm->vcpu);
 }
 
 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@ -2802,7 +2843,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr)
 
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
-   return kvm_emulate_wrmsr(&svm->vcpu);
+   u32 ecx;
+   u64 data;
+
+

[RFC PATCH v2 04/33] KVM: SVM: Add support for the SEV-ES VMSA

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

Allocate a page during vCPU creation to be used as the encrypted VM save
area (VMSA) for the SEV-ES guest. Provide a flag in the kvm_vcpu_arch
structure that indicates whether the guest state is protected.

When freeing a VMSA page that has been encrypted, the cache contents must
be flushed using the MSR_AMD64_VM_PAGE_FLUSH before freeing the page.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/kvm_host.h  |  3 +++
 arch/x86/include/asm/msr-index.h |  1 +
 arch/x86/kvm/svm/svm.c   | 42 ++--
 arch/x86/kvm/svm/svm.h   |  4 +++
 4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d0f77235da92..355fef2cd4e2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -789,6 +789,9 @@ struct kvm_vcpu_arch {
 
/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
+
+   /* Protected Guests */
+   bool guest_state_protected;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 249a4147c4b2..16f5b20bb099 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -466,6 +466,7 @@
 #define MSR_AMD64_IBSBRTARGET  0xc001103b
 #define MSR_AMD64_IBSOPDATA4   0xc001103d
 #define MSR_AMD64_IBS_REG_COUNT_MAX8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_VM_PAGE_FLUSH0xc001011e
 #define MSR_AMD64_SEV_ES_GHCB  0xc0010130
 #define MSR_AMD64_SEV  0xc0010131
 #define MSR_AMD64_SEV_ENABLED_BIT  0
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6c47e1655db3..5bbdbaefcd9e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1268,6 +1268,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm;
struct page *vmcb_page;
struct page *hsave_page;
+   struct page *vmsa_page = NULL;
int err;
 
BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
@@ -1282,9 +1283,19 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
if (!hsave_page)
goto error_free_vmcb_page;
 
+   if (sev_es_guest(svm->vcpu.kvm)) {
+   /*
+* SEV-ES guests require a separate VMSA page used to contain
+* the encrypted register state of the guest.
+*/
+   vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+   if (!vmsa_page)
+   goto error_free_hsave_page;
+   }
+
err = avic_init_vcpu(svm);
if (err)
-   goto error_free_hsave_page;
+   goto error_free_vmsa_page;
 
/* We initialize this flag to true to make sure that the is_running
 * bit would be set the first time the vcpu is loaded.
@@ -1296,7 +1307,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 
svm->msrpm = svm_vcpu_alloc_msrpm();
if (!svm->msrpm)
-   goto error_free_hsave_page;
+   goto error_free_vmsa_page;
 
svm_vcpu_init_msrpm(vcpu, svm->msrpm);
 
@@ -1309,6 +1320,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 
svm->vmcb = page_address(vmcb_page);
svm->vmcb_pa = __sme_set(page_to_pfn(vmcb_page) << PAGE_SHIFT);
+
+   if (vmsa_page)
+   svm->vmsa = page_address(vmsa_page);
+
svm->asid_generation = 0;
init_vmcb(svm);
 
@@ -1319,6 +1334,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 
 error_free_msrpm:
svm_vcpu_free_msrpm(svm->msrpm);
+error_free_vmsa_page:
+   if (vmsa_page)
+   __free_page(vmsa_page);
 error_free_hsave_page:
__free_page(hsave_page);
 error_free_vmcb_page:
@@ -1346,6 +1364,26 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 */
svm_clear_current_vmcb(svm->vmcb);
 
+   if (sev_es_guest(vcpu->kvm)) {
+   struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
+
+   if (vcpu->arch.guest_state_protected) {
+   u64 page_to_flush;
+
+   /*
+* The VMSA page was used by hardware to hold guest
+* encrypted state, be sure to flush it before returning
+* it to the system. This is done using the VM Page
+* Flush MSR (which takes the page virtual address and
+* guest ASID).
+*/
+   page_to_flush = (u64)svm->vmsa | sev->asid;
+   wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, page_to_flush);
+   }
+
+   __free_page(virt_to_page(svm->vmsa));
+   }
+
__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
__free_p

[RFC PATCH v2 02/33] KVM: SVM: Add support for SEV-ES capability in KVM

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

Add support to KVM for determining if a system is capable of supporting
SEV-ES as well as determining if a guest is an SEV-ES guest.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/Kconfig   |  3 ++-
 arch/x86/kvm/svm/sev.c | 47 ++
 arch/x86/kvm/svm/svm.c | 20 +-
 arch/x86/kvm/svm/svm.h | 17 ++-
 4 files changed, 66 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index fbd5bd7a945a..4e8924aab05e 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -99,7 +99,8 @@ config KVM_AMD_SEV
depends on KVM_AMD && X86_64
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
help
-   Provides support for launching Encrypted VMs on AMD processors.
+ Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
+ with Encrypted State (SEV-ES) on AMD processors.
 
 config KVM_MMU_AUDIT
bool "Audit KVM MMU"
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 2febbf916af2..9af8369450b2 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -931,7 +931,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
struct kvm_sev_cmd sev_cmd;
int r;
 
-   if (!svm_sev_enabled())
+   if (!svm_sev_enabled() || !sev)
return -ENOTTY;
 
if (!argp)
@@ -1124,29 +1124,58 @@ void sev_vm_destroy(struct kvm *kvm)
sev_asid_free(sev->asid);
 }
 
-int __init sev_hardware_setup(void)
+void __init sev_hardware_setup(void)
 {
+   unsigned int eax, ebx, ecx, edx;
+   bool sev_es_supported = false;
+   bool sev_supported = false;
+
+   /* Does the CPU support SEV? */
+   if (!boot_cpu_has(X86_FEATURE_SEV))
+   goto out;
+
+   /* Retrieve SEV CPUID information */
+   cpuid(0x801f, &eax, &ebx, &ecx, &edx);
+
/* Maximum number of encrypted guests supported simultaneously */
-   max_sev_asid = cpuid_ecx(0x801F);
+   max_sev_asid = ecx;
 
if (!svm_sev_enabled())
-   return 1;
+   goto out;
 
/* Minimum ASID value that should be used for SEV guest */
-   min_sev_asid = cpuid_edx(0x801F);
+   min_sev_asid = edx;
 
/* Initialize SEV ASID bitmaps */
sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
if (!sev_asid_bitmap)
-   return 1;
+   goto out;
 
sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
if (!sev_reclaim_asid_bitmap)
-   return 1;
+   goto out;
 
-   pr_info("SEV supported\n");
+   pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
+   sev_supported = true;
 
-   return 0;
+   /* SEV-ES support requested? */
+   if (!sev_es)
+   goto out;
+
+   /* Does the CPU support SEV-ES? */
+   if (!boot_cpu_has(X86_FEATURE_SEV_ES))
+   goto out;
+
+   /* Has the system been allocated ASIDs for SEV-ES? */
+   if (min_sev_asid == 1)
+   goto out;
+
+   pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
+   sev_es_supported = true;
+
+out:
+   sev = sev_supported;
+   sev_es = sev_es_supported;
 }
 
 void sev_hardware_teardown(void)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4f401fc6a05d..6c47e1655db3 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -187,9 +187,13 @@ static int vgif = true;
 module_param(vgif, int, 0444);
 
 /* enable/disable SEV support */
-static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
 module_param(sev, int, 0444);
 
+/* enable/disable SEV-ES support */
+int sev_es = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+module_param(sev_es, int, 0444);
+
 static bool __read_mostly dump_invalid_vmcb = 0;
 module_param(dump_invalid_vmcb, bool, 0644);
 
@@ -938,15 +942,11 @@ static __init int svm_hardware_setup(void)
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
}
 
-   if (sev) {
-   if (boot_cpu_has(X86_FEATURE_SEV) &&
-   IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
-   r = sev_hardware_setup();
-   if (r)
-   sev = false;
-   } else {
-   sev = false;
-   }
+   if (IS_ENABLED(CONFIG_KVM_AMD_SEV) && sev) {
+   sev_hardware_setup();
+   } else {
+   sev = false;
+   sev_es = false;
}
 
svm_adjust_mmio_mask();
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index a7f997459b87..84a8e48e698a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm

[RFC PATCH v2 03/33] KVM: SVM: Add GHCB accessor functions for retrieving fields

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

Update the GHCB accessor functions to add functions for retrieve GHCB
fields by name. Update existing code to use the new accessor functions.

Signed-off-by: Tom Lendacky 
---
 arch/x86/include/asm/svm.h   | 10 ++
 arch/x86/kernel/cpu/vmware.c | 12 ++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 71d630bb5e08..1edf24f51b53 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -379,6 +379,16 @@ struct vmcb {
(unsigned long *)&ghcb->save.valid_bitmap); 
\
}   
\

\
+   static inline u64 ghcb_get_##field(struct ghcb *ghcb)   
\
+   {   
\
+   return ghcb->save.field;
\
+   }   
\
+   
\
+   static inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb)
\
+   {   
\
+   return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0;
\
+   }   
\
+   
\
static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value)   
\
{   
\
__set_bit(GHCB_BITMAP_IDX(field),   
\
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 924571fe5864..c6ede3b3d302 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -501,12 +501,12 @@ static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, 
struct pt_regs *regs)
  ghcb_rbp_is_valid(ghcb)))
return false;
 
-   regs->bx = ghcb->save.rbx;
-   regs->cx = ghcb->save.rcx;
-   regs->dx = ghcb->save.rdx;
-   regs->si = ghcb->save.rsi;
-   regs->di = ghcb->save.rdi;
-   regs->bp = ghcb->save.rbp;
+   regs->bx = ghcb_get_rbx(ghcb);
+   regs->cx = ghcb_get_rcx(ghcb);
+   regs->dx = ghcb_get_rdx(ghcb);
+   regs->si = ghcb_get_rsi(ghcb);
+   regs->di = ghcb_get_rdi(ghcb);
+   regs->bp = ghcb_get_rbp(ghcb);
 
return true;
 }
-- 
2.28.0



[RFC PATCH v2 01/33] KVM: SVM: Remove the call to sev_platform_status() during setup

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

When both KVM support and the CCP driver are built into the kernel instead
of as modules, KVM initialization happens before CCP initialization. As a
result, sev_platform_status() will return a failure when it is called from
sev_hardware_setup(), when this isn't really an error condition.

Since sev_platform_status() doesn't need to be called at this time anyway,
remove the invocation from sev_hardware_setup().

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/sev.c | 22 +-
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 65e15c22bd3c..2febbf916af2 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1126,9 +1126,6 @@ void sev_vm_destroy(struct kvm *kvm)
 
 int __init sev_hardware_setup(void)
 {
-   struct sev_user_data_status *status;
-   int rc;
-
/* Maximum number of encrypted guests supported simultaneously */
max_sev_asid = cpuid_ecx(0x801F);
 
@@ -1147,26 +1144,9 @@ int __init sev_hardware_setup(void)
if (!sev_reclaim_asid_bitmap)
return 1;
 
-   status = kmalloc(sizeof(*status), GFP_KERNEL);
-   if (!status)
-   return 1;
-
-   /*
-* Check SEV platform status.
-*
-* PLATFORM_STATUS can be called in any state, if we failed to query
-* the PLATFORM status then either PSP firmware does not support SEV
-* feature or SEV firmware is dead.
-*/
-   rc = sev_platform_status(status, NULL);
-   if (rc)
-   goto err;
-
pr_info("SEV supported\n");
 
-err:
-   kfree(status);
-   return rc;
+   return 0;
 }
 
 void sev_hardware_teardown(void)
-- 
2.28.0



[RFC PATCH v2 00/33] SEV-ES hypervisor support

2020-10-02 Thread Tom Lendacky
From: Tom Lendacky 

This patch series provides support for running SEV-ES guests under KVM.

Secure Encrypted Virtualization - Encrypted State (SEV-ES) expands on the
SEV support to protect the guest register state from the hypervisor. See
"AMD64 Architecture Programmer's Manual Volume 2: System Programming",
section "15.35 Encrypted State (SEV-ES)" [1].

In order to allow a hypervisor to perform functions on behalf of a guest,
there is architectural support for notifying a guest's operating system
when certain types of VMEXITs are about to occur. This allows the guest to
selectively share information with the hypervisor to satisfy the requested
function. The notification is performed using a new exception, the VMM
Communication exception (#VC). The information is shared through the
Guest-Hypervisor Communication Block (GHCB) using the VMGEXIT instruction.
The GHCB format and the protocol for using it is documented in "SEV-ES
Guest-Hypervisor Communication Block Standardization" [2].

Under SEV-ES, a vCPU save area (VMSA) must be encrypted. SVM is updated to
build the initial VMSA and then encrypt it before running the guest. Once
encrypted, it must not be modified by the hypervisor. Modification of the
VMSA will result in the VMRUN instruction failing with a SHUTDOWN exit
code. KVM must support the VMGEXIT exit code in order to perform the
necessary functions required of the guest. The GHCB is used to exchange
the information needed by both the hypervisor and the guest.

To simplify access to the VMSA and the GHCB, SVM uses an accessor function
to obtain the address of the either the VMSA or the GHCB, depending on the
stage of execution of the guest.

There are changes to some of the intercepts that are needed under SEV-ES.
For example, CR0 writes cannot be intercepted, so the code needs to ensure
that the intercept is not enabled during execution or that the hypervisor
does not try to read the register as part of exit processing. Another
example is shutdown processing, where the vCPU cannot be directly reset.

Support is added to handle VMGEXIT events and implement the GHCB protocol.
This includes supporting standard exit events, like a CPUID instruction
intercept, to new support, for things like AP processor booting. Much of
the existing SVM intercept support can be re-used by setting the exit
code information from the VMGEXIT and calling the appropriate intercept
handlers.

Finally, to launch and run an SEV-ES guest requires changes to the vCPU
initialization, loading and execution.

[1] https://www.amd.com/system/files/TechDocs/24593.pdf
[2] https://developer.amd.com/wp-content/resources/56421.pdf

---

These patches are based on a commit of the KVM next branch. However, I had
to backport recent SEV-ES guest patches (a previous series to the actual
patches that are now in the tip tree) into my development branch, since
there are prereq patches needed by this series. As a result, this patch
series will not successfully build or apply to the KVM next branch as is.

A version of the tree can be found at:
https://github.com/AMDESE/linux/tree/sev-es-5.9-v1

Changes from v1:
- Removed the VMSA indirection support:
  - On LAUNCH_UPDATE_VMSA, sync traditional VMSA over to the new SEV-ES
VMSA area to be encrypted.
  - On VMGEXIT VMEXIT, directly copy valid registers into vCPU arch
register array from GHCB. On VMRUN (following a VMGEXIT), directly
copy dirty vCPU arch registers to GHCB.
  - Removed reg_read_override()/reg_write_override() KVM ops.
- Added VMGEXIT exit-reason validation.
- Changed kvm_vcpu_arch variable vmsa_encrypted to guest_state_protected
- Updated the tracking support for EFER/CR0/CR4/CR8 to minimize changes
  to the x86.c code
- Updated __set_sregs to not set any register values (previously supported
  setting the tracked values of EFER/CR0/CR4/CR8)
- Added support for reporting SMM capability at the VM-level. This allows
  an SEV-ES guest to indicate SMM is not supported
- Updated FPU support to check for a guest FPU save area before using it.
  Updated SVM to free guest FPU for an SEV-ES guest during KVM create_vcpu
  op.
- Removed changes to the kvm_skip_emulated_instruction()
- Added VMSA validity checks before invoking LAUNCH_UPDATE_VMSA
- Minor code restructuring in areas for better readability

Cc: Paolo Bonzini 
Cc: Jim Mattson 
Cc: Joerg Roedel 
Cc: Sean Christopherson 
Cc: Vitaly Kuznetsov 
Cc: Wanpeng Li 
Cc: Borislav Petkov 
Cc: Ingo Molnar 
Cc: Thomas Gleixner 
Cc: Brijesh Singh 

Tom Lendacky (33):
  KVM: SVM: Remove the call to sev_platform_status() during setup
  KVM: SVM: Add support for SEV-ES capability in KVM
  KVM: SVM: Add GHCB accessor functions for retrieving fields
  KVM: SVM: Add support for the SEV-ES VMSA
  KVM: x86: Mark GPRs dirty when written
  KVM: SVM: Add required changes to support intercepts under SEV-ES
  KVM: SVM: Prevent debugging under SEV-ES
  KVM: SVM: Do not allow instruction emulation under

Re: [RFC Patch 0/2] KVM: SVM: Cgroup support for SVM SEV ASIDs

2020-10-01 Thread Tom Lendacky

On 10/1/20 1:08 PM, Peter Gonda wrote:

On Thu, Sep 24, 2020 at 1:55 PM Tom Lendacky  wrote:


On 9/24/20 2:21 PM, Sean Christopherson wrote:

On Tue, Sep 22, 2020 at 02:14:04PM -0700, Vipin Sharma wrote:

On Mon, Sep 21, 2020 at 06:48:38PM -0700, Sean Christopherson wrote:

On Mon, Sep 21, 2020 at 05:40:22PM -0700, Vipin Sharma wrote:

Hello,

This patch series adds a new SEV controller for tracking and limiting
the usage of SEV ASIDs on the AMD SVM platform.

SEV ASIDs are used in creating encrypted VM and lightweight sandboxes
but this resource is in very limited quantity on a host.

This limited quantity creates issues like SEV ASID starvation and
unoptimized scheduling in the cloud infrastructure.

SEV controller provides SEV ASID tracking and resource control
mechanisms.


This should be genericized to not be SEV specific.  TDX has a similar
scarcity issue in the form of key IDs, which IIUC are analogous to SEV ASIDs
(gave myself a quick crash course on SEV ASIDs).  Functionally, I doubt it
would change anything, I think it'd just be a bunch of renaming.  The hardest
part would probably be figuring out a name :-).

Another idea would be to go even more generic and implement a KVM cgroup
that accounts the number of VMs of a particular type, e.g. legacy, SEV,
SEV-ES?, and TDX.  That has potential future problems though as it falls
apart if hardware every supports 1:MANY VMs:KEYS, or if there is a need to
account keys outside of KVM, e.g. if MKTME for non-KVM cases ever sees the
light of day.


I read about the TDX and its use of the KeyID for encrypting VMs. TDX
has two kinds of KeyIDs private and shared.


To clarify, "shared" KeyIDs are simply legacy MKTME KeyIDs.  This is relevant
because those KeyIDs can be used without TDX or KVM in the picture.


On AMD platform there are two types of ASIDs for encryption.
1. SEV ASID - Normal runtime guest memory encryption.
2. SEV-ES ASID - Extends SEV ASID by adding register state encryption with
   integrity.

Both types of ASIDs have their own maximum value which is provisioned in
the firmware


Ugh, I missed that detail in the SEV-ES RFC.  Does SNP add another ASID type,
or does it reuse SEV-ES ASIDs?  If it does add another type, is that trend
expected to continue, i.e. will SEV end up with SEV, SEV-ES, SEV-ES-SNP,
SEV-ES-SNP-X, SEV-ES-SNP-X-Y, etc...?


SEV-SNP and SEV-ES share the same ASID range.


Where is this documented? From the SEV-SNP FW ABI Spec 0.8 "The
firmware checks that ASID is an encryption capable ASID. If not, the
firmware returns INVALID_ASID." that doesn't seem clear that an SEV-ES
ASID is required. Should this document be more clear?


I let the owner of the spec know and it will be updated.

Thanks,
Tom





[tip: x86/seves] x86/sev-es: Use GHCB accessor for setting the MMIO scratch buffer

2020-09-25 Thread tip-bot2 for Tom Lendacky
The following commit has been merged into the x86/seves branch of tip:

Commit-ID: 0ddfb1cf3b6b07c97cff16ea69931d986f9622ee
Gitweb:
https://git.kernel.org/tip/0ddfb1cf3b6b07c97cff16ea69931d986f9622ee
Author:Tom Lendacky 
AuthorDate:Fri, 25 Sep 2020 08:38:26 -05:00
Committer: Borislav Petkov 
CommitterDate: Fri, 25 Sep 2020 17:12:41 +02:00

x86/sev-es: Use GHCB accessor for setting the MMIO scratch buffer

Use ghcb_set_sw_scratch() to set the GHCB scratch field, which will also
set the corresponding bit in the GHCB valid_bitmap field to denote that
sw_scratch is actually valid.

Signed-off-by: Tom Lendacky 
Signed-off-by: Borislav Petkov 
Reviewed-by: Joerg Roedel 
Link: 
https://lkml.kernel.org/r/ba84deabdf44a7a880454fb351d189c6ad79d4ba.1601041106.git.thomas.lenda...@amd.com
---
 arch/x86/kernel/sev-es.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 6fcfdd3..4a96726 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -751,7 +751,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct 
es_em_ctxt *ctxt,
/* Can never be greater than 8 */
exit_info_2 = bytes;
 
-   ghcb->save.sw_scratch = ghcb_pa + offsetof(struct ghcb, shared_buffer);
+   ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, 
shared_buffer));
 
return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, 
exit_info_2);
 }


[PATCH] x86/sev-es: Use GHCB accessor for setting the MMIO scratch buffer

2020-09-25 Thread Tom Lendacky
From: Tom Lendacky 

Use ghcb_set_sw_scratch() to set the GHCB scratch field, which will also
set the corresponding bit in the GHCB valid_bitmap field.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kernel/sev-es.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 6fcfdd32769f..4a96726fbaf8 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -751,7 +751,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct 
es_em_ctxt *ctxt,
/* Can never be greater than 8 */
exit_info_2 = bytes;
 
-   ghcb->save.sw_scratch = ghcb_pa + offsetof(struct ghcb, shared_buffer);
+   ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, 
shared_buffer));
 
return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, 
exit_info_2);
 }
-- 
2.28.0



Re: [RFC Patch 0/2] KVM: SVM: Cgroup support for SVM SEV ASIDs

2020-09-24 Thread Tom Lendacky

On 9/24/20 2:21 PM, Sean Christopherson wrote:

On Tue, Sep 22, 2020 at 02:14:04PM -0700, Vipin Sharma wrote:

On Mon, Sep 21, 2020 at 06:48:38PM -0700, Sean Christopherson wrote:

On Mon, Sep 21, 2020 at 05:40:22PM -0700, Vipin Sharma wrote:

Hello,

This patch series adds a new SEV controller for tracking and limiting
the usage of SEV ASIDs on the AMD SVM platform.

SEV ASIDs are used in creating encrypted VM and lightweight sandboxes
but this resource is in very limited quantity on a host.

This limited quantity creates issues like SEV ASID starvation and
unoptimized scheduling in the cloud infrastructure.

SEV controller provides SEV ASID tracking and resource control
mechanisms.


This should be genericized to not be SEV specific.  TDX has a similar
scarcity issue in the form of key IDs, which IIUC are analogous to SEV ASIDs
(gave myself a quick crash course on SEV ASIDs).  Functionally, I doubt it
would change anything, I think it'd just be a bunch of renaming.  The hardest
part would probably be figuring out a name :-).

Another idea would be to go even more generic and implement a KVM cgroup
that accounts the number of VMs of a particular type, e.g. legacy, SEV,
SEV-ES?, and TDX.  That has potential future problems though as it falls
apart if hardware every supports 1:MANY VMs:KEYS, or if there is a need to
account keys outside of KVM, e.g. if MKTME for non-KVM cases ever sees the
light of day.


I read about the TDX and its use of the KeyID for encrypting VMs. TDX
has two kinds of KeyIDs private and shared.


To clarify, "shared" KeyIDs are simply legacy MKTME KeyIDs.  This is relevant
because those KeyIDs can be used without TDX or KVM in the picture.


On AMD platform there are two types of ASIDs for encryption.
1. SEV ASID - Normal runtime guest memory encryption.
2. SEV-ES ASID - Extends SEV ASID by adding register state encryption with
 integrity.

Both types of ASIDs have their own maximum value which is provisioned in
the firmware


Ugh, I missed that detail in the SEV-ES RFC.  Does SNP add another ASID type,
or does it reuse SEV-ES ASIDs?  If it does add another type, is that trend
expected to continue, i.e. will SEV end up with SEV, SEV-ES, SEV-ES-SNP,
SEV-ES-SNP-X, SEV-ES-SNP-X-Y, etc...?


SEV-SNP and SEV-ES share the same ASID range.

Thanks,
Tom




So, we are talking about 4 different types of resources:
1. AMD SEV ASID (implemented in this patch as sev.* files in SEV cgroup)
2. AMD SEV-ES ASID (in future, adding files like sev_es.*)
3. Intel TDX private KeyID
4. Intel TDX shared KeyID

TDX private KeyID is similar to SEV and SEV-ES ASID. I think coming up
with the same name which can be used by both platforms will not be easy,
and extensible with the future enhancements. This will get even more
difficult if Arm also comes up with something similar but with different
nuances.


Honest question, what's easier for userspace/orchestration layers?  Having an
abstract but common name, or conrete but different names?  My gut reaction is
to provide a common interface, but I can see how that could do more harm than
good, e.g. some amount of hardware capabilitiy discovery is possible with
concrete names.  And I'm guessing there's already a fair amount of vendor
specific knowledge bleeding into userspace for these features...

And if SNP is adding another ASID namespace, trying to abstract the types is
probably a lost cause.

 From a code perspective, I doubt it will matter all that much, e.g. it should
be easy enough to provide helpers for exposing a new asid/key type.


I like the idea of the KVM cgroup and when it is mounted it will have
different files based on the hardware platform.


I don't think a KVM cgroup is the correct approach, e.g. there are potential
use cases for "legacy" MKTME without KVM.  Maybe something like Encryption
Keys cgroup?


1. KVM cgroup on AMD will have:
sev.max & sev.current.
sev_es.max & sev_es.current.

2. KVM cgroup mounted on Intel:
tdx_private_keys.max
tdx_shared_keys.max

The KVM cgroup can be used to have control files which are generic (no
use case in my mind right now) and hardware platform specific files
also.


My "generic KVM cgroup" suggestion was probably a pretty bad suggestion.
Except for ASIDs/KeyIDs, KVM itself doesn't manage any constrained resources,
e.g. memory, logical CPUs, time slices, etc... are all generic resources that
are consumed by KVM but managed elsewhere.  We definitely don't want to change
that, nor do I think we want to do anything, such as creating a KVM cgroup,
that would imply that having KVM manage resources is a good idea.



[PATCH v2 1/2] KVM: SVM: Add a dedicated INVD intercept routine

2020-09-24 Thread Tom Lendacky
From: Tom Lendacky 

The INVD instruction intercept performs emulation. Emulation can't be done
on an SEV guest because the guest memory is encrypted.

Provide a dedicated intercept routine for the INVD intercept. And since
the instruction is emulated as a NOP, just skip it instead.

Fixes: 1654efcbc431 ("KVM: SVM: Add KVM_SEV_INIT command")
Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c91acabf18d0..66d225899781 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2183,6 +2183,12 @@ static int iret_interception(struct vcpu_svm *svm)
return 1;
 }
 
+static int invd_interception(struct vcpu_svm *svm)
+{
+   /* Treat an INVD instruction as a NOP and just skip it. */
+   return kvm_skip_emulated_instruction(&svm->vcpu);
+}
+
 static int invlpg_interception(struct vcpu_svm *svm)
 {
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
@@ -2774,7 +2780,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_RDPMC]= rdpmc_interception,
[SVM_EXIT_CPUID]= cpuid_interception,
[SVM_EXIT_IRET] = iret_interception,
-   [SVM_EXIT_INVD] = emulate_on_interception,
+   [SVM_EXIT_INVD] = invd_interception,
[SVM_EXIT_PAUSE]= pause_interception,
[SVM_EXIT_HLT]  = halt_interception,
[SVM_EXIT_INVLPG]   = invlpg_interception,
-- 
2.28.0



[PATCH v2 2/2] KVM: VMX: Do not perform emulation for INVD intercept

2020-09-24 Thread Tom Lendacky
From: Tom Lendacky 

The INVD instruction is emulated as a NOP, just skip the instruction
instead.

Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/vmx/vmx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8646a797b7a8..f8075d3acf9c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5148,7 +5148,8 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
 
 static int handle_invd(struct kvm_vcpu *vcpu)
 {
-   return kvm_emulate_instruction(vcpu, 0);
+   /* Treat an INVD instruction as a NOP and just skip it. */
+   return kvm_skip_emulated_instruction(vcpu);
 }
 
 static int handle_invlpg(struct kvm_vcpu *vcpu)
-- 
2.28.0



[PATCH v2 0/2] INVD intercept change to skip instruction

2020-09-24 Thread Tom Lendacky
From: Tom Lendacky 

This series updates the INVD intercept support for both SVM and VMX to
skip the instruction rather than emulating it, since emulation of this
instruction is just a NOP.

For SVM, it requires creating a dedicated INVD intercept routine that
invokes kvm_skip_emulated_instruction(). The current support uses the
common emulate_on_interception() routine, which does not work for SEV
guests, and so a Fixes: tag is added.

For VMX, which already has a dedicated INVD intercept routine, it changes
kvm_emulate_instruction() into a call to kvm_skip_emulated_instruction().

Tom Lendacky (2):
  KVM: SVM: Add a dedicated INVD intercept routine
  KVM: VMX: Do not perform emulation for INVD intercept

 arch/x86/kvm/svm/svm.c | 8 +++-
 arch/x86/kvm/vmx/vmx.c | 3 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)

-- 
2.28.0



Re: [PATCH] KVM: SVM: Add a dedicated INVD intercept routine

2020-09-24 Thread Tom Lendacky

On 9/24/20 1:51 AM, Paolo Bonzini wrote:

On 23/09/20 22:40, Tom Lendacky wrote:

+static int invd_interception(struct vcpu_svm *svm)
+{
+   /*
+* Can't do emulation on an SEV guest and INVD is emulated
+* as a NOP, so just skip the instruction.
+*/
+   return (sev_guest(svm->vcpu.kvm))
+   ? kvm_skip_emulated_instruction(&svm->vcpu)
+   : kvm_emulate_instruction(&svm->vcpu, 0);


Is there any reason not to do kvm_skip_emulated_instruction() for both SEV
and legacy?  VMX has the same odd kvm_emulate_instruction() call, but AFAICT
that's completely unecessary, i.e. VMX can also convert to a straight skip.


You could, I just figured I'd leave the legacy behavior just in case. Not
that I can think of a reason that behavior would ever change.


Yeah, let's do skip for both SVM and VMX.


Ok, I'll submit a two patch series to change SVM and VMX. I'll do two 
patches because of the fixes tag to get the SVM fix back to stable. But, 
if you would prefer a single patch, let me know.


Thanks,
Tom



Paolo



Re: [PATCH] KVM: SVM: Add a dedicated INVD intercept routine

2020-09-23 Thread Tom Lendacky
On 9/23/20 3:32 PM, Sean Christopherson wrote:
> On Wed, Sep 23, 2020 at 03:27:39PM -0500, Tom Lendacky wrote:
>> From: Tom Lendacky 
>>
>> The INVD instruction intercept performs emulation. Emulation can't be done
>> on an SEV guest because the guest memory is encrypted.
>>
>> Provide a dedicated intercept routine for the INVD intercept. Within this
>> intercept routine just skip the instruction for an SEV guest, since it is
>> emulated as a NOP anyway.
>>
>> Fixes: 1654efcbc431 ("KVM: SVM: Add KVM_SEV_INIT command")
>> Signed-off-by: Tom Lendacky 
>> ---
>>  arch/x86/kvm/svm/svm.c | 13 -
>>  1 file changed, 12 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index c91acabf18d0..332ec4425d89 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -2183,6 +2183,17 @@ static int iret_interception(struct vcpu_svm *svm)
>>  return 1;
>>  }
>>  
>> +static int invd_interception(struct vcpu_svm *svm)
>> +{
>> +/*
>> + * Can't do emulation on an SEV guest and INVD is emulated
>> + * as a NOP, so just skip the instruction.
>> + */
>> +return (sev_guest(svm->vcpu.kvm))
>> +? kvm_skip_emulated_instruction(&svm->vcpu)
>> +: kvm_emulate_instruction(&svm->vcpu, 0);
> 
> Is there any reason not to do kvm_skip_emulated_instruction() for both SEV
> and legacy?  VMX has the same odd kvm_emulate_instruction() call, but AFAICT
> that's completely unecessary, i.e. VMX can also convert to a straight skip.

You could, I just figured I'd leave the legacy behavior just in case. Not
that I can think of a reason that behavior would ever change.

Thanks,
Tom

> 
>> +}
>> +
>>  static int invlpg_interception(struct vcpu_svm *svm)
>>  {
>>  if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
>> @@ -2774,7 +2785,7 @@ static int (*const svm_exit_handlers[])(struct 
>> vcpu_svm *svm) = {
>>  [SVM_EXIT_RDPMC]= rdpmc_interception,
>>  [SVM_EXIT_CPUID]= cpuid_interception,
>>  [SVM_EXIT_IRET] = iret_interception,
>> -[SVM_EXIT_INVD] = emulate_on_interception,
>> +[SVM_EXIT_INVD] = invd_interception,
>>  [SVM_EXIT_PAUSE]= pause_interception,
>>  [SVM_EXIT_HLT]  = halt_interception,
>>  [SVM_EXIT_INVLPG]   = invlpg_interception,
>> -- 
>> 2.28.0
>>


[PATCH] KVM: SVM: Add a dedicated INVD intercept routine

2020-09-23 Thread Tom Lendacky
From: Tom Lendacky 

The INVD instruction intercept performs emulation. Emulation can't be done
on an SEV guest because the guest memory is encrypted.

Provide a dedicated intercept routine for the INVD intercept. Within this
intercept routine just skip the instruction for an SEV guest, since it is
emulated as a NOP anyway.

Fixes: 1654efcbc431 ("KVM: SVM: Add KVM_SEV_INIT command")
Signed-off-by: Tom Lendacky 
---
 arch/x86/kvm/svm/svm.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c91acabf18d0..332ec4425d89 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2183,6 +2183,17 @@ static int iret_interception(struct vcpu_svm *svm)
return 1;
 }
 
+static int invd_interception(struct vcpu_svm *svm)
+{
+   /*
+* Can't do emulation on an SEV guest and INVD is emulated
+* as a NOP, so just skip the instruction.
+*/
+   return (sev_guest(svm->vcpu.kvm))
+   ? kvm_skip_emulated_instruction(&svm->vcpu)
+   : kvm_emulate_instruction(&svm->vcpu, 0);
+}
+
 static int invlpg_interception(struct vcpu_svm *svm)
 {
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
@@ -2774,7 +2785,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_RDPMC]= rdpmc_interception,
[SVM_EXIT_CPUID]= cpuid_interception,
[SVM_EXIT_IRET] = iret_interception,
-   [SVM_EXIT_INVD] = emulate_on_interception,
+   [SVM_EXIT_INVD] = invd_interception,
[SVM_EXIT_PAUSE]= pause_interception,
[SVM_EXIT_HLT]  = halt_interception,
[SVM_EXIT_INVLPG]   = invlpg_interception,
-- 
2.28.0



Re: [PATCH] crypto: ccp - fix error handling

2020-09-22 Thread Tom Lendacky
On 9/21/20 6:34 AM, Pavel Machek wrote:
> Fix resource leak in error handling.

Does it need a Fixes: tag?

Thanks,
Tom

> 
> Signed-off-by: Pavel Machek (CIP) 
> 
> diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
> index bd270e66185e..40869ea1ed20 100644
> --- a/drivers/crypto/ccp/ccp-ops.c
> +++ b/drivers/crypto/ccp/ccp-ops.c
> @@ -1744,7 +1744,7 @@ ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct 
> ccp_cmd *cmd)
>   break;
>   default:
>   ret = -EINVAL;
> - goto e_ctx;
> + goto e_data;
>   }
>   } else {
>   /* Stash the context */
> 


Re: [RFC PATCH 08/35] KVM: SVM: Prevent debugging under SEV-ES

2020-09-17 Thread Tom Lendacky

On 9/16/20 5:50 PM, Sean Christopherson wrote:

On Wed, Sep 16, 2020 at 03:27:13PM -0500, Tom Lendacky wrote:

On 9/16/20 11:49 AM, Sean Christopherson wrote:

On Wed, Sep 16, 2020 at 11:38:38AM -0500, Tom Lendacky wrote:



On 9/16/20 11:02 AM, Sean Christopherson wrote:

On Wed, Sep 16, 2020 at 10:11:10AM -0500, Tom Lendacky wrote:

On 9/15/20 3:13 PM, Tom Lendacky wrote:

On 9/15/20 11:30 AM, Sean Christopherson wrote:

I don't quite follow the "doesn't mean debugging can't be done in the future".
Does that imply that debugging could be supported for SEV-ES guests, even if
they have an encrypted VMSA?


Almost anything can be done with software. It would require a lot of
hypervisor and guest code and changes to the GHCB spec, etc. So given
that, probably just the check for arch.guest_state_protected is enough for
now. I'll just need to be sure none of the debugging paths can be taken
before the VMSA is encrypted.


So I don't think there's any guarantee that the KVM_SET_GUEST_DEBUG ioctl
couldn't be called before the VMSA is encrypted, meaning I can't check the
arch.guest_state_protected bit for that call. So if we really want to get
rid of the allow_debug() op, I'd need some other way to indicate that this
is an SEV-ES / protected state guest.


Would anything break if KVM "speculatively" set guest_state_protected before
LAUNCH_UPDATE_VMSA?  E.g. does KVM need to emulate before LAUNCH_UPDATE_VMSA?


Yes, the way the code is set up, the guest state (VMSA) is initialized in
the same way it is today (mostly) and that state is encrypted by the
LAUNCH_UPDATE_VMSA call. I check the guest_state_protected bit to decide
on whether to direct the updates to the real VMSA (before it's encrypted)
or the GHCB (that's the get_vmsa() function from patch #5).


Ah, gotcha.  Would it work to set guest_state_protected[*] from time zero,
and move vmsa_encrypted to struct vcpu_svm?  I.e. keep vmsa_encrypted, but
use it only for guiding get_vmsa() and related behavior.


It is mainly __set_sregs() that needs to know when to allow the register
writes and when not to. During guest initialization, __set_sregs is how
some of the VMSA is initialized by Qemu.


Hmm.  I assume that also means KVM_SET_REGS and KVM_GET_XCRS are also legal
before the VMSA is encrypted?  If so, then the current behavior of setting
vmsa_encrypted "late" make sense.  KVM_SET_FPU/XSAVE can be handled by not
allocating guest_fpu, i.e. they can be disallowed from time zero without
adding an SEV-ES specific check.

Which brings us back to KVM_SET_GUEST_DEBUG.  What would happen if that were
allowed prior to VMSA encryption?  If LAUNCH_UPDATE_VMSA acts as a sort of
reset, one thought would be to allow KVM_SET_GUEST_DEBUG and then sanitize
KVM's state during LAUNCH_UPDATE_VMSA.  Or perhaps even better, disallow
LAUNCH_UPDATE_VMSA if vcpu->guest_debug!=0.  That would allow using debug
capabilities up until LAUNCH_UPDATE_VMSA without adding much burden to KVM.


I think the vcpu->guest_debug check before the LAUNCH_UPDATE_VMSA would be 
good. I'll remove the allow_debug() op and replace it with the 
guest_state_protected check in its place.


Thanks,
Tom





Re: [RFC PATCH 08/35] KVM: SVM: Prevent debugging under SEV-ES

2020-09-16 Thread Tom Lendacky
On 9/15/20 3:13 PM, Tom Lendacky wrote:
> On 9/15/20 11:30 AM, Sean Christopherson wrote:
>> On Tue, Sep 15, 2020 at 08:37:12AM -0500, Tom Lendacky wrote:
>>> On 9/14/20 4:26 PM, Sean Christopherson wrote:
>>>> On Mon, Sep 14, 2020 at 03:15:22PM -0500, Tom Lendacky wrote:
>>>>> From: Tom Lendacky 
>>>>>
>>>>> Since the guest register state of an SEV-ES guest is encrypted, debugging
>>>>> is not supported. Update the code to prevent guest debugging when the
>>>>> guest is an SEV-ES guest. This includes adding a callable function that
>>>>> is used to determine if the guest supports being debugged.
>>>>>
>>>>> Signed-off-by: Tom Lendacky 
>>>>> ---
>>>>>  arch/x86/include/asm/kvm_host.h |  2 ++
>>>>>  arch/x86/kvm/svm/svm.c  | 16 
>>>>>  arch/x86/kvm/vmx/vmx.c  |  7 +++
>>>>>  arch/x86/kvm/x86.c  |  3 +++
>>>>>  4 files changed, 28 insertions(+)
>>>>>
>>>>> diff --git a/arch/x86/include/asm/kvm_host.h 
>>>>> b/arch/x86/include/asm/kvm_host.h
>>>>> index c900992701d6..3e2a3d2a8ba8 100644
>>>>> --- a/arch/x86/include/asm/kvm_host.h
>>>>> +++ b/arch/x86/include/asm/kvm_host.h
>>>>> @@ -1234,6 +1234,8 @@ struct kvm_x86_ops {
>>>>>   void (*reg_read_override)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
>>>>>   void (*reg_write_override)(struct kvm_vcpu *vcpu, enum kvm_reg reg,
>>>>>  unsigned long val);
>>>>> +
>>>>> + bool (*allow_debug)(struct kvm *kvm);
>>>>
>>>> Why add both allow_debug() and vmsa_encrypted?  I assume there are 
>>>> scenarios
>>>> where allow_debug() != vmsa_encrypted?  E.g. is there a debug mode for 
>>>> SEV-ES
>>>> where the VMSA is not encrypted, but KVM (ironically) can't intercept #DBs 
>>>> or
>>>> something?
>>>
>>> No, once the guest has had LAUNCH_UPDATE_VMSA run against the vCPUs, then
>>> the vCPU states are all encrypted. But that doesn't mean that debugging
>>> can't be done in the future.
>>
>> I don't quite follow the "doesn't mean debugging can't be done in the 
>> future".
>> Does that imply that debugging could be supported for SEV-ES guests, even if
>> they have an encrypted VMSA?
> 
> Almost anything can be done with software. It would require a lot of
> hypervisor and guest code and changes to the GHCB spec, etc. So given
> that, probably just the check for arch.guest_state_protected is enough for
> now. I'll just need to be sure none of the debugging paths can be taken
> before the VMSA is encrypted.

So I don't think there's any guarantee that the KVM_SET_GUEST_DEBUG ioctl
couldn't be called before the VMSA is encrypted, meaning I can't check the
arch.guest_state_protected bit for that call. So if we really want to get
rid of the allow_debug() op, I'd need some other way to indicate that this
is an SEV-ES / protected state guest.

How are you planning on blocking this ioctl for TDX? Would the
arch.guest_state_protected bit be sit earlier than is done for SEV-ES?

Thanks,
Tom

> 
> Thanks,
> Tom
> 
>>


Re: [RFC PATCH 11/35] KVM: SVM: Prepare for SEV-ES exit handling in the sev.c file

2020-09-16 Thread Tom Lendacky
On 9/15/20 12:21 PM, Sean Christopherson wrote:
> On Mon, Sep 14, 2020 at 03:15:25PM -0500, Tom Lendacky wrote:
>> From: Tom Lendacky 
>>
>> This is a pre-patch to consolidate some exit handling code into callable
>> functions. Follow-on patches for SEV-ES exit handling will then be able
>> to use them from the sev.c file.
>>
>> Signed-off-by: Tom Lendacky 
>> ---
>>  arch/x86/kvm/svm/svm.c | 64 +-
>>  1 file changed, 38 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index f9daa40b3cfc..6a4cc535ba77 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -3047,6 +3047,43 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
>> "excp_to:", save->last_excp_to);
>>  }
>>  
>> +static bool svm_is_supported_exit(struct kvm_vcpu *vcpu, u64 exit_code)
>> +{
>> +if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
>> +svm_exit_handlers[exit_code])
>> +return true;
>> +
>> +vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
>> +dump_vmcb(vcpu);
>> +vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
>> +vcpu->run->internal.suberror = 
>> KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
>> +vcpu->run->internal.ndata = 2;
>> +vcpu->run->internal.data[0] = exit_code;
>> +vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
> 
> Based on the name "is_supported_exit", I would prefer that vcpu->run be filled
> in by the caller.  Looking at the below code where svm_is_supported_exit() is
> checked, without diving into the implementation of the helper it's not at all
> clear that vcpu->run is filled.
> 
> Assuming svm_invoke_exit_handler() is the only user, it probably makes sense 
> to
> fill vcpu->run in the caller.  If there will be multiple callers, then it'd be
> nice to rename svm_is_supported_exit() to e.g. svm_handle_invalid_exit() or 
> so.

Will change.

> 
>> +
>> +return false;
>> +}
>> +
>> +static int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code)
>> +{
>> +if (!svm_is_supported_exit(&svm->vcpu, exit_code))
>> +return 0;
>> +
>> +#ifdef CONFIG_RETPOLINE
>> +if (exit_code == SVM_EXIT_MSR)
>> +return msr_interception(svm);
>> +else if (exit_code == SVM_EXIT_VINTR)
>> +return interrupt_window_interception(svm);
>> +else if (exit_code == SVM_EXIT_INTR)
>> +return intr_interception(svm);
>> +else if (exit_code == SVM_EXIT_HLT)
>> +return halt_interception(svm);
>> +else if (exit_code == SVM_EXIT_NPF)
>> +return npf_interception(svm);
>> +#endif
>> +return svm_exit_handlers[exit_code](svm);
> 
> Now I see why kvm_skip_emulated_instruction() is bailing on SEV-ES guests,
> #VMGEXIT simply routes through the legacy exit handlers.  Which totally makes
> sense from a code reuse perspective, but the lack of sanity checking with that
> approach is undesirable, e.g. I assume there are a big pile of exit codes that
> are flat out unsupported for SEV-ES, and ideally KVM would yell loudly if it
> tries to do skip_emulated_instruction() for a protected guest.
> 
> Rather than route through the legacy handlers, I suspect it will be more
> desirable in the long run to have a separate path for #VMGEXIT, i.e. a path
> that does the back half of emulation (the front half being the "fetch" phase).

Except there are some automatic exits (AE events) that don't go through
VMGEXIT and would need to be sure the RIP isn't updated. I can audit the
AE events and see what's possible.

Additionally, maybe just ensuring that kvm_x86_ops.get_rflags() doesn't
return something with the TF flag set eliminates the need for the change
to kvm_skip_emulated_instruction().

> 
> The biggest downsides would be code duplication and ongoing maintenance.  Our
> current approach for TDX is to eat that overhead, because it's not _that_ much
> code.  But, maybe there's a middle ground, e.g. using the existing flows but
> having them skip (heh) kvm_skip_emulated_instruction() for protected guests.
> 
> There are a few flows, e.g. MMIO emulation, that will need dedicated
> implementations, but I'm 99% certain we can put those in x86.c and share them
> between SEV-ES and TDX.
>  
> One question that will impact KVM's options: can KVM inject exceptions to
> SEV-ES guests?  E.g. if the

Re: [RFC PATCH 08/35] KVM: SVM: Prevent debugging under SEV-ES

2020-09-16 Thread Tom Lendacky
On 9/16/20 11:49 AM, Sean Christopherson wrote:
> On Wed, Sep 16, 2020 at 11:38:38AM -0500, Tom Lendacky wrote:
>>
>>
>> On 9/16/20 11:02 AM, Sean Christopherson wrote:
>>> On Wed, Sep 16, 2020 at 10:11:10AM -0500, Tom Lendacky wrote:
>>>> On 9/15/20 3:13 PM, Tom Lendacky wrote:
>>>>> On 9/15/20 11:30 AM, Sean Christopherson wrote:
>>>>>> I don't quite follow the "doesn't mean debugging can't be done in the 
>>>>>> future".
>>>>>> Does that imply that debugging could be supported for SEV-ES guests, 
>>>>>> even if
>>>>>> they have an encrypted VMSA?
>>>>>
>>>>> Almost anything can be done with software. It would require a lot of
>>>>> hypervisor and guest code and changes to the GHCB spec, etc. So given
>>>>> that, probably just the check for arch.guest_state_protected is enough for
>>>>> now. I'll just need to be sure none of the debugging paths can be taken
>>>>> before the VMSA is encrypted.
>>>>
>>>> So I don't think there's any guarantee that the KVM_SET_GUEST_DEBUG ioctl
>>>> couldn't be called before the VMSA is encrypted, meaning I can't check the
>>>> arch.guest_state_protected bit for that call. So if we really want to get
>>>> rid of the allow_debug() op, I'd need some other way to indicate that this
>>>> is an SEV-ES / protected state guest.
>>>
>>> Would anything break if KVM "speculatively" set guest_state_protected before
>>> LAUNCH_UPDATE_VMSA?  E.g. does KVM need to emulate before 
>>> LAUNCH_UPDATE_VMSA?
>>
>> Yes, the way the code is set up, the guest state (VMSA) is initialized in
>> the same way it is today (mostly) and that state is encrypted by the
>> LAUNCH_UPDATE_VMSA call. I check the guest_state_protected bit to decide
>> on whether to direct the updates to the real VMSA (before it's encrypted)
>> or the GHCB (that's the get_vmsa() function from patch #5).
> 
> Ah, gotcha.  Would it work to set guest_state_protected[*] from time zero,
> and move vmsa_encrypted to struct vcpu_svm?  I.e. keep vmsa_encrypted, but
> use it only for guiding get_vmsa() and related behavior.

It is mainly __set_sregs() that needs to know when to allow the register
writes and when not to. During guest initialization, __set_sregs is how
some of the VMSA is initialized by Qemu.

Thanks,
Tom

> 


Re: [RFC PATCH 08/35] KVM: SVM: Prevent debugging under SEV-ES

2020-09-16 Thread Tom Lendacky



On 9/16/20 11:02 AM, Sean Christopherson wrote:
> On Wed, Sep 16, 2020 at 10:11:10AM -0500, Tom Lendacky wrote:
>> On 9/15/20 3:13 PM, Tom Lendacky wrote:
>>> On 9/15/20 11:30 AM, Sean Christopherson wrote:
>>>> I don't quite follow the "doesn't mean debugging can't be done in the 
>>>> future".
>>>> Does that imply that debugging could be supported for SEV-ES guests, even 
>>>> if
>>>> they have an encrypted VMSA?
>>>
>>> Almost anything can be done with software. It would require a lot of
>>> hypervisor and guest code and changes to the GHCB spec, etc. So given
>>> that, probably just the check for arch.guest_state_protected is enough for
>>> now. I'll just need to be sure none of the debugging paths can be taken
>>> before the VMSA is encrypted.
>>
>> So I don't think there's any guarantee that the KVM_SET_GUEST_DEBUG ioctl
>> couldn't be called before the VMSA is encrypted, meaning I can't check the
>> arch.guest_state_protected bit for that call. So if we really want to get
>> rid of the allow_debug() op, I'd need some other way to indicate that this
>> is an SEV-ES / protected state guest.
> 
> Would anything break if KVM "speculatively" set guest_state_protected before
> LAUNCH_UPDATE_VMSA?  E.g. does KVM need to emulate before LAUNCH_UPDATE_VMSA?

Yes, the way the code is set up, the guest state (VMSA) is initialized in
the same way it is today (mostly) and that state is encrypted by the
LAUNCH_UPDATE_VMSA call. I check the guest_state_protected bit to decide
on whether to direct the updates to the real VMSA (before it's encrypted)
or the GHCB (that's the get_vmsa() function from patch #5).

Thanks,
Tom

> 
>> How are you planning on blocking this ioctl for TDX? Would the
>> arch.guest_state_protected bit be sit earlier than is done for SEV-ES?
> 
> Yep, guest_state_protected is set from time zero (kvm_x86_ops.vm_init) as
> guest state is encrypted/inaccessible from the get go.  The flag actually
> gets turned off for debuggable TDX guests, but that's also forced to happen
> before the KVM_RUN can be invoked (TDX architecture) and is a one-time
> configuration, i.e. userspace can flip the switch exactly once, and only at
> a very specific point in time.
> 


Re: [RFC PATCH 05/35] KVM: SVM: Add initial support for SEV-ES GHCB access to KVM

2020-09-16 Thread Tom Lendacky
On 9/15/20 11:28 AM, Sean Christopherson wrote:
> On Tue, Sep 15, 2020 at 08:24:22AM -0500, Tom Lendacky wrote:
>> On 9/14/20 3:58 PM, Sean Christopherson wrote:
>>>> @@ -79,6 +88,9 @@ static inline void kvm_register_write(struct kvm_vcpu 
>>>> *vcpu, int reg,
>>>>if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
>>>>return;
>>>>  
>>>> +  if (kvm_x86_ops.reg_write_override)
>>>> +  kvm_x86_ops.reg_write_override(vcpu, reg, val);
>>>
>>>
>>> There has to be a more optimal approach for propagating registers between
>>> vcpu->arch.regs and the VMSA than adding a per-GPR hook.  Why not simply
>>> copy the entire set of registers to/from the VMSA on every exit and entry?
>>> AFAICT, valid_bits is only used in the read path, and KVM doesn't do 
>>> anything
>>> sophistated when it hits a !valid_bits reads.
>>
>> That would probably be ok. And actually, the code might be able to just
>> check the GHCB valid bitmap for valid regs on exit, copy them and then
>> clear the bitmap. The write code could check if vmsa_encrypted is set and
>> then set a "valid" bit for the reg that could be used to set regs on entry.
>>
>> I'm not sure if turning kvm_vcpu_arch.regs into a struct and adding a
>> valid bit would be overkill or not.
> 
> KVM already has space in regs_avail and regs_dirty for GPRs, they're just not
> used by the get/set helpers because they're always loaded/stored for both SVM
> and VMX.
> 
> I assume nothing will break if KVM "writes" random GPRs in the VMSA?  I can't
> see how the guest would achieve any level of security if it wantonly consumes
> GPRs, i.e. it's the guest's responsibility to consume only the relevant GPRs.

Right, the guest should only read the registers that it is expecting to be
provided by the hypervisor as set forth in the GHCB spec. It shouldn't
load any other registers that the hypervisor provides. The Linux SEV-ES
guest support follows this model and will only load the registers that are
specified via the GHCB spec for a particular NAE event, ignoring anything
else provided.

> 
> If that holds true, than avoiding the copying isn't functionally necessary, 
> and
> is really just a performance optimization.  One potentially crazy idea would 
> be
> to change vcpu->arch.regs to be a pointer (defaults a __regs array), and then
> have SEV-ES switch it to point directly at the VMSA array (I think the layout
> is identical for x86-64?).

That would be nice, but it isn't quite laid out like that. Before SEV-ES
support, RAX and RSP were the only GPRs saved. With the arrival of SEV-ES,
the remaining registers were added to the VMSA, but a number of bytes
after RAX and RSP. So right now, there are reserved areas where RAX and
RSP would have been at the new register block in the VMSA (see offset
0x300 in the VMSA layout of the APM volume 2,
https://www.amd.com/system/files/TechDocs/24593.pdf).

I might be able to move the RAX and RSP values before the VMSA is
encrypted (or the GHCB returned), assuming those fields would stay
reserved, but I don't think that can be guaranteed.

Let me see if I can put something together using regs_avail and regs_dirty.

> 
>>>> @@ -4012,6 +4052,99 @@ static bool svm_apic_init_signal_blocked(struct 
>>>> kvm_vcpu *vcpu)
>>>>   (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
>>>>  }
>>>>  
>>>> +/*
>>>> + * These return values represent the offset in quad words within the VM 
>>>> save
>>>> + * area. This allows them to be accessed by casting the save area to a u64
>>>> + * array.
>>>> + */
>>>> +#define VMSA_REG_ENTRY(_field) (offsetof(struct vmcb_save_area, 
>>>> _field) / sizeof(u64))
>>>> +#define VMSA_REG_UNDEF VMSA_REG_ENTRY(valid_bitmap)
>>>> +static inline unsigned int vcpu_to_vmsa_entry(enum kvm_reg reg)
>>>> +{
>>>> +  switch (reg) {
>>>> +  case VCPU_REGS_RAX: return VMSA_REG_ENTRY(rax);
>>>> +  case VCPU_REGS_RBX: return VMSA_REG_ENTRY(rbx);
>>>> +  case VCPU_REGS_RCX: return VMSA_REG_ENTRY(rcx);
>>>> +  case VCPU_REGS_RDX: return VMSA_REG_ENTRY(rdx);
>>>> +  case VCPU_REGS_RSP: return VMSA_REG_ENTRY(rsp);
>>>> +  case VCPU_REGS_RBP: return VMSA_REG_ENTRY(rbp);
>>>> +  case VCPU_REGS_RSI: return VMSA_REG_ENTRY(rsi);
>>>> +  case VCPU_REGS_RDI: return VMSA_REG_ENTRY(rdi);
>>>> +#ifd

Re: [RFC PATCH 05/35] KVM: SVM: Add initial support for SEV-ES GHCB access to KVM

2020-09-15 Thread Tom Lendacky
On 9/14/20 3:58 PM, Sean Christopherson wrote:
> On Mon, Sep 14, 2020 at 03:15:19PM -0500, Tom Lendacky wrote:
>> From: Tom Lendacky 
>>
>> Provide initial support for accessing the GHCB when needing to access
>> registers for an SEV-ES guest. The support consists of:
>>
>>   - Accessing the GHCB instead of the VMSA when reading and writing
>> guest registers (after the VMSA has been encrypted).
>>   - Creating register access override functions for reading and writing
>> guest registers from the common KVM support.
>>   - Allocating pages for the VMSA and GHCB when creating each vCPU
>> - The VMSA page holds the encrypted VMSA for the vCPU
>> - The GHCB page is used to hold a copy of the guest GHCB during
>>   VMGEXIT processing.
>>
>> Signed-off-by: Tom Lendacky 
>> ---
>>  arch/x86/include/asm/kvm_host.h  |   7 ++
>>  arch/x86/include/asm/msr-index.h |   1 +
>>  arch/x86/kvm/kvm_cache_regs.h|  30 +--
>>  arch/x86/kvm/svm/svm.c   | 138 ++-
>>  arch/x86/kvm/svm/svm.h   |  65 ++-
>>  5 files changed, 230 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h 
>> b/arch/x86/include/asm/kvm_host.h
>> index 5303dbc5c9bc..c900992701d6 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -788,6 +788,9 @@ struct kvm_vcpu_arch {
>>  
>>  /* AMD MSRC001_0015 Hardware Configuration */
>>  u64 msr_hwcr;
>> +
>> +/* SEV-ES support */
>> +bool vmsa_encrypted;
> 
> 
> Peeking a little into the future, Intel needs a very similar flag for TDX[*].
> At a glance throughout the series,, I don't see anything that is super SEV-ES
> specific, so I think we could do s/vmsa_encrypted/guest_state_protected (or
> something along those lines).

Yup, I can do that.

> 
> [*] 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fsoftware.intel.com%2Fcontent%2Fwww%2Fus%2Fen%2Fdevelop%2Farticles%2Fintel-trust-domain-extensions.html&data=02%7C01%7Cthomas.lendacky%40amd.com%7Cd5fcf35d079042b095b308d858f0e12f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637357138885657516&sdata=aSclP%2BxSatvG9GOMEtWpXfdUxrLOlVcCXJNH41OdGms%3D&reserved=0
> 
>>  };
>>  
>>  struct kvm_lpage_info {
>> @@ -1227,6 +1230,10 @@ struct kvm_x86_ops {
>>  int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
>>  
>>  void (*migrate_timers)(struct kvm_vcpu *vcpu);
>> +
>> +void (*reg_read_override)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
>> +void (*reg_write_override)(struct kvm_vcpu *vcpu, enum kvm_reg reg,
>> +   unsigned long val);
>>  };
>>  
>>  struct kvm_x86_nested_ops {
>> diff --git a/arch/x86/include/asm/msr-index.h 
>> b/arch/x86/include/asm/msr-index.h
>> index 249a4147c4b2..16f5b20bb099 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -466,6 +466,7 @@
>>  #define MSR_AMD64_IBSBRTARGET   0xc001103b
>>  #define MSR_AMD64_IBSOPDATA40xc001103d
>>  #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
>> +#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
>>  #define MSR_AMD64_SEV_ES_GHCB   0xc0010130
>>  #define MSR_AMD64_SEV   0xc0010131
>>  #define MSR_AMD64_SEV_ENABLED_BIT   0
>> diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
>> index cfe83d4ae625..e87eb90999d5 100644
>> --- a/arch/x86/kvm/kvm_cache_regs.h
>> +++ b/arch/x86/kvm/kvm_cache_regs.h
>> @@ -9,15 +9,21 @@
>>  (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
>>   | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE | X86_CR4_TSD)
>>  
>> -#define BUILD_KVM_GPR_ACCESSORS(lname, uname)   
>>   \
>> -static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu 
>> *vcpu)\
>> -{ \
>> -return vcpu->arch.regs[VCPU_REGS_##uname];\
>> -} \
>> -static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu,  
>>   \
>> -unsigned long val)\
>> -{ 

Re: [RFC PATCH 26/35] KVM: SVM: Guest FPU state save/restore not needed for SEV-ES guest

2020-09-15 Thread Tom Lendacky
On 9/14/20 4:39 PM, Sean Christopherson wrote:
> On Mon, Sep 14, 2020 at 03:15:40PM -0500, Tom Lendacky wrote:
>> From: Tom Lendacky 
>>
>> The guest FPU is automatically restored on VMRUN and saved on VMEXIT by
>> the hardware, so there is no reason to do this in KVM.
> 
> I assume hardware has its own buffer?  If so, a better approach would be to
> not allocate arch.guest_fpu in the first place, and then rework KVM to key
> off !guest_fpu.

Yup, let me look into that.

Thanks,
Tom

> 
>> Signed-off-by: Tom Lendacky 
>> ---
>>  arch/x86/kvm/svm/svm.c |  8 ++--
>>  arch/x86/kvm/x86.c | 18 ++
>>  2 files changed, 20 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index b35c2de1130c..48699c41b62a 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -3682,7 +3682,9 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct 
>> kvm_vcpu *vcpu)
>>  svm_set_dr6(svm, DR6_FIXED_1 | DR6_RTM);
>>  
>>  clgi();
>> -kvm_load_guest_xsave_state(vcpu);
>> +
>> +if (!sev_es_guest(svm->vcpu.kvm))
>> +kvm_load_guest_xsave_state(vcpu);
>>  
>>  if (lapic_in_kernel(vcpu) &&
>>  vcpu->arch.apic->lapic_timer.timer_advance_ns)
>> @@ -3728,7 +3730,9 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct 
>> kvm_vcpu *vcpu)
>>  if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
>>  kvm_before_interrupt(&svm->vcpu);
>>  
>> -kvm_load_host_xsave_state(vcpu);
>> +if (!sev_es_guest(svm->vcpu.kvm))
>> +kvm_load_host_xsave_state(vcpu);
>> +
>>  stgi();
>>  
>>  /* Any pending NMI will happen here */
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 76efe70cd635..a53e24c1c5d1 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -8896,9 +8896,14 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
>>  
>>  kvm_save_current_fpu(vcpu->arch.user_fpu);
>>  
>> -/* PKRU is separately restored in kvm_x86_ops.run.  */
>> -__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
>> -~XFEATURE_MASK_PKRU);
>> +/*
>> + * An encrypted save area means that the guest state can't be
>> + * set by the hypervisor, so skip trying to set it.
>> + */
>> +if (!vcpu->arch.vmsa_encrypted)
>> +/* PKRU is separately restored in kvm_x86_ops.run. */
>> +__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
>> +~XFEATURE_MASK_PKRU);
>>  
>>  fpregs_mark_activate();
>>  fpregs_unlock();
>> @@ -8911,7 +8916,12 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
>>  {
>>  fpregs_lock();
>>  
>> -kvm_save_current_fpu(vcpu->arch.guest_fpu);
>> +/*
>> + * An encrypted save area means that the guest state can't be
>> + * read/saved by the hypervisor, so skip trying to save it.
>> + */
>> +if (!vcpu->arch.vmsa_encrypted)
>> +kvm_save_current_fpu(vcpu->arch.guest_fpu);
>>  
>>  copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
>>  
>> -- 
>> 2.28.0
>>


Re: [RFC PATCH 25/35] KVM: x86: Update __get_sregs() / __set_sregs() to support SEV-ES

2020-09-15 Thread Tom Lendacky
On 9/14/20 4:37 PM, Sean Christopherson wrote:
> On Mon, Sep 14, 2020 at 03:15:39PM -0500, Tom Lendacky wrote:
>> From: Tom Lendacky 
>>
>> Since many of the registers used by the SEV-ES are encrypted and cannot
>> be read or written, adjust the __get_sregs() / __set_sregs() to only get
>> or set the registers being tracked (efer, cr0, cr4 and cr8) once the VMSA
>> is encrypted.
> 
> Is there an actual use case for writing said registers after the VMSA is
> encrypted?  Assuming there's a separate "debug mode" and live migration has
> special logic, can KVM simply reject the ioctl() if guest state is protected?

Yeah, I originally had it that way but one of the folks looking at live
migration for SEV-ES thought it would be easier given the way Qemu does
things. But I think it's easy enough to batch the tracking registers into
the VMSA state that is being transferred during live migration. Let me
check that out and likely the SET ioctl() could just skip all the regs.

Thanks,
Tom

> 


Re: [RFC PATCH 21/35] KVM: SVM: Add support for EFER write traps for an SEV-ES guest

2020-09-15 Thread Tom Lendacky
On 9/14/20 5:08 PM, Sean Christopherson wrote:
> On Mon, Sep 14, 2020 at 03:15:35PM -0500, Tom Lendacky wrote:
>> From: Tom Lendacky 
>>
>> For SEV-ES guests, the interception of EFER write access is not
>> recommended. EFER interception occurs prior to EFER being modified and
>> the hypervisor is unable to modify EFER itself because the register is
>> located in the encrypted register state.
>>
>> SEV-ES guests introduce a new EFER write trap. This trap provides
>> intercept support of an EFER write after it has been modified. The new
>> EFER value is provided in the VMCB EXITINFO1 field, allowing the
>> hypervisor to track the setting of the guest EFER.
>>
>> Add support to track the value of the guest EFER value using the EFER
>> write trap so that the hypervisor understands the guest operating mode.
>>
>> Signed-off-by: Tom Lendacky 
>> ---
>>  arch/x86/include/asm/kvm_host.h |  1 +
>>  arch/x86/include/uapi/asm/svm.h |  2 ++
>>  arch/x86/kvm/svm/svm.c  | 12 
>>  arch/x86/kvm/x86.c  | 12 
>>  4 files changed, 27 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h 
>> b/arch/x86/include/asm/kvm_host.h
>> index 7320a9c68a5a..b535b690eb66 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1427,6 +1427,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu 
>> *vcpu, u8 vector);
>>  int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
>>  int reason, bool has_error_code, u32 error_code);
>>  
>> +int kvm_track_efer(struct kvm_vcpu *vcpu, u64 efer);
>>  int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
>>  int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
>>  int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
>> diff --git a/arch/x86/include/uapi/asm/svm.h 
>> b/arch/x86/include/uapi/asm/svm.h
>> index 0bc3942ffdd3..ce937a242995 100644
>> --- a/arch/x86/include/uapi/asm/svm.h
>> +++ b/arch/x86/include/uapi/asm/svm.h
>> @@ -77,6 +77,7 @@
>>  #define SVM_EXIT_MWAIT_COND0x08c
>>  #define SVM_EXIT_XSETBV0x08d
>>  #define SVM_EXIT_RDPRU 0x08e
>> +#define SVM_EXIT_EFER_WRITE_TRAP0x08f
>>  #define SVM_EXIT_NPF   0x400
>>  #define SVM_EXIT_AVIC_INCOMPLETE_IPI0x401
>>  #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS  0x402
>> @@ -183,6 +184,7 @@
>>  { SVM_EXIT_MONITOR, "monitor" }, \
>>  { SVM_EXIT_MWAIT,   "mwait" }, \
>>  { SVM_EXIT_XSETBV,  "xsetbv" }, \
>> +{ SVM_EXIT_EFER_WRITE_TRAP, "write_efer_trap" }, \
>>  { SVM_EXIT_NPF, "npf" }, \
>>  { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
>>  { SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }, \
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index ac64a5b128b2..ac467225a51d 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -2466,6 +2466,17 @@ static int cr8_write_interception(struct vcpu_svm 
>> *svm)
>>  return 0;
>>  }
>>  
>> +static int efer_trap(struct vcpu_svm *svm)
>> +{
>> +int ret;
>> +
>> +ret = kvm_track_efer(&svm->vcpu, svm->vmcb->control.exit_info_1);
>> +if (ret)
> 
> Shouldn't this be a WARN or something?  E.g. KVM thinks the WRMSR has faulted,
> while it obviously hasn't, which means KVM's internal model is now out of 
> sync.

Makes sense, I can add something here.

> 
>> +return ret;
>> +
>> +return kvm_complete_insn_gp(&svm->vcpu, 0);
>> +}
>> +
>>  static int svm_get_msr_feature(struct kvm_msr_entry *msr)
>>  {
>>  msr->data = 0;
>> @@ -2944,6 +2955,7 @@ static int (*const svm_exit_handlers[])(struct 
>> vcpu_svm *svm) = {
>>  [SVM_EXIT_MWAIT]= mwait_interception,
>>  [SVM_EXIT_XSETBV]   = xsetbv_interception,
>>  [SVM_EXIT_RDPRU]= rdpru_interception,
>> +[SVM_EXIT_EFER_WRITE_TRAP]  = efer_trap,
>>  [SVM_EXIT_NPF]  = npf_interception,
>>  [SVM_EXIT_RSM]  = rsm_interception,
>>  [SVM_EXIT_AVIC_INCOMPLETE_IPI]  = 
>> avic_incomplete_ipi_interception,
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 674719d801d2.

Re: [RFC PATCH 22/35] KVM: SVM: Add support for CR0 write traps for an SEV-ES guest

2020-09-15 Thread Tom Lendacky
On 9/14/20 5:13 PM, Sean Christopherson wrote:
> On Mon, Sep 14, 2020 at 03:15:36PM -0500, Tom Lendacky wrote:
>> From: Tom Lendacky 
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index b65bd0c986d4..6f5988c305e1 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -799,11 +799,29 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
>>  }
>>  EXPORT_SYMBOL_GPL(pdptrs_changed);
>>  
>> +static void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0,
>> + unsigned long cr0)
> 
> What about using __kvm_set_cr*() instead of kvm_post_set_cr*()?  That would
> show that __kvm_set_cr*() is a subordinate of kvm_set_cr*(), and from the
> SVM side would provide the hint that the code is skipping the front end of
> kvm_set_cr*().

Ok, I'll change this (and the others) to __kvm_set_cr* and export them.

> 
>> +{
>> +unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
>> +
>> +if ((cr0 ^ old_cr0) & X86_CR0_PG) {
>> +kvm_clear_async_pf_completion_queue(vcpu);
>> +kvm_async_pf_hash_reset(vcpu);
>> +}
>> +
>> +if ((cr0 ^ old_cr0) & update_bits)
>> +kvm_mmu_reset_context(vcpu);
>> +
>> +if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
>> +kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
>> +!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
>> +kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
>> +}
>> +
>>  int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>>  {
>>  unsigned long old_cr0 = kvm_read_cr0(vcpu);
>>  unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
>> -unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
>>  
>>  cr0 |= X86_CR0_ET;
>>  
>> @@ -842,22 +860,23 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long 
>> cr0)
>>  
>>  kvm_x86_ops.set_cr0(vcpu, cr0);
>>  
>> -if ((cr0 ^ old_cr0) & X86_CR0_PG) {
>> -kvm_clear_async_pf_completion_queue(vcpu);
>> -kvm_async_pf_hash_reset(vcpu);
>> -}
>> +kvm_post_set_cr0(vcpu, old_cr0, cr0);
>>  
>> -if ((cr0 ^ old_cr0) & update_bits)
>> -kvm_mmu_reset_context(vcpu);
>> +return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_set_cr0);
>>  
>> -if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
>> -kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
>> -!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
>> -kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
>> +int kvm_track_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> 
> I really dislike the "track" terminology.  For me, using "track" as the verb
> in a function implies the function activates tracking.  But it's probably a
> moot point, because similar to EFER, I don't see any reason to put the front
> end of the emulation into x86.c.  Both getting old_cr0 and setting
> vcpu->arch.cr0 can be done in svm.c

Yup, I can move that to svm.c.

Thanks,
Tom

> 
>> +{
>> +unsigned long old_cr0 = kvm_read_cr0(vcpu);
>> +
>> +vcpu->arch.cr0 = cr0;
>> +
>> +kvm_post_set_cr0(vcpu, old_cr0, cr0);
>>  
>>  return 0;
>>  }
>> -EXPORT_SYMBOL_GPL(kvm_set_cr0);
>> +EXPORT_SYMBOL_GPL(kvm_track_cr0);
>>  
>>  void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
>>  {
>> -- 
>> 2.28.0
>>


Re: [RFC PATCH 24/35] KVM: SVM: Add support for CR8 write traps for an SEV-ES guest

2020-09-15 Thread Tom Lendacky
On 9/14/20 5:19 PM, Sean Christopherson wrote:
> On Mon, Sep 14, 2020 at 03:15:38PM -0500, Tom Lendacky wrote:
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 5e5f1e8fed3a..6e445a76b691 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -1109,6 +1109,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
>>  }
>>  EXPORT_SYMBOL_GPL(kvm_get_cr8);
>>  
>> +int kvm_track_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
>> +{
>> +return kvm_set_cr8(vcpu, cr8);
> 
> I'm guessing this was added to achieve consistency at the SVM call sites.
> With the previously suggested changes, kvm_track_cr8() can simply be
> dropped.

Yup.

Thanks,
Tom

> 
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_track_cr8);
>> +
>>  static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
>>  {
>>  int i;
>> -- 
>> 2.28.0
>>


<    1   2   3   4   5   6   7   8   9   10   >