from:"Siddharth Chandrasekaran"

[PATCH v3 4/4] KVM: hyper-v: Advertise support for fast XMM hypercalls

2021-04-13 Thread Siddharth Chandrasekaran

Now that kvm_hv_flush_tlb() has been patched to support XMM hypercall
inputs, we can start advertising this feature to guests.

Cc: Alexander Graf 
Cc: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/include/asm/hyperv-tlfs.h | 7 ++-
 arch/x86/kvm/hyperv.c  | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
b/arch/x86/include/asm/hyperv-tlfs.h
index ee6336a54f92..597ae1142864 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -52,7 +52,7 @@
  * Support for passing hypercall input parameter block via XMM
  * registers is available
  */
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE  BIT(4)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE   BIT(4)
 /* Support for a virtual guest idle state is available */
 #define HV_X64_GUEST_IDLE_STATE_AVAILABLE  BIT(5)
 /* Frequency MSRs available */
@@ -61,6 +61,11 @@
 #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE   BIT(10)
 /* Support for debug MSRs available */
 #define HV_FEATURE_DEBUG_MSRS_AVAILABLEBIT(11)
+/*
+ * Support for returning hypercall output block via XMM
+ * registers is available
+ */
+#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE  BIT(15)
 /* stimer Direct Mode is available */
 #define HV_STIMER_DIRECT_MODE_AVAILABLEBIT(19)
 
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index cd6c6f1f06a4..0f6fd7550510 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2235,6 +2235,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct 
kvm_cpuid2 *cpuid,
ent->ebx |= HV_POST_MESSAGES;
ent->ebx |= HV_SIGNAL_EVENTS;
 
+   ent->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
 
-- 
2.17.1




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

[PATCH v3 2/4] KVM: hyper-v: Collect hypercall params into struct

2021-04-13 Thread Siddharth Chandrasekaran

As of now there are 7 parameters (and flags) that are used in various
hyper-v hypercall handlers. There are 6 more input/output parameters
passed from XMM registers which are to be added in an upcoming patch.

To make passing arguments to the handlers more readable, capture all
these parameters into a single structure.

Cc: Alexander Graf 
Cc: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/kvm/hyperv.c | 147 +++---
 1 file changed, 79 insertions(+), 68 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f98370a39936..8a542243e1cd 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1623,7 +1623,18 @@ static __always_inline unsigned long 
*sparse_set_to_vcpu_mask(
return vcpu_bitmap;
 }
 
-static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, 
bool ex)
+struct kvm_hv_hcall {
+   u64 param;
+   u64 ingpa;
+   u64 outgpa;
+   u16 code;
+   u16 rep_cnt;
+   u16 rep_idx;
+   bool fast;
+   bool rep;
+};
+
+static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, 
bool ex)
 {
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
@@ -1638,7 +1649,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
bool all_cpus;
 
if (!ex) {
-   if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, 
sizeof(flush
return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
trace_kvm_hv_flush_tlb(flush.processor_mask,
@@ -1657,7 +1668,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
flush.processor_mask == 0;
} else {
-   if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
sizeof(flush_ex
return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
@@ -1679,8 +1690,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
 
if (!all_cpus &&
kvm_read_guest(kvm,
-  ingpa + offsetof(struct hv_tlb_flush_ex,
-   hv_vp_set.bank_contents),
+  hc->ingpa + offsetof(struct hv_tlb_flush_ex,
+   
hv_vp_set.bank_contents),
   sparse_banks,
   sparse_banks_len))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
@@ -1700,9 +1711,9 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
NULL, vcpu_mask, &hv_vcpu->tlb_flush);
 
 ret_success:
-   /* We always do full TLB flush, set rep_done = rep_cnt. */
+   /* We always do full TLB flush, set 'Reps completed' = 'Rep Count' */
return (u64)HV_STATUS_SUCCESS |
-   ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
+   ((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
 }
 
 static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
@@ -1724,8 +1735,7 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 
vector,
}
 }
 
-static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, u64 ingpa, u64 outgpa,
-  bool ex, bool fast)
+static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, 
bool ex)
 {
struct kvm *kvm = vcpu->kvm;
struct hv_send_ipi_ex send_ipi_ex;
@@ -1740,25 +1750,25 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, u64 
ingpa, u64 outgpa,
bool all_cpus;
 
if (!ex) {
-   if (!fast) {
-   if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
+   if (!hc->fast) {
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
sizeof(send_ipi
return HV_STATUS_INVALID_HYPERCALL_INPUT;
sparse_banks[0] = send_ipi.cpu_mask;
vector = send_ipi.vector;
} else {
/* 'reserved' part of hv_send_ipi should be 0 */
-   if (unlikely(ingpa >> 32 != 0))
+   if (unlikely(hc->ingpa >> 32 != 0))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
-   sparse_banks[0] = outgpa;
-   vector = (u32)ingpa;
+   spars

[PATCH v3 3/4] KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers

2021-04-13 Thread Siddharth Chandrasekaran

Hyper-V supports the use of XMM registers to perform fast hypercalls.
This allows guests to take advantage of the improved performance of the
fast hypercall interface even though a hypercall may require more than
(the current maximum of) two input registers.

The XMM fast hypercall interface uses six additional XMM registers (XMM0
to XMM5) to allow the guest to pass an input parameter block of up to
112 bytes.

Add framework to read from XMM registers in kvm_hv_hypercall() and use
the additional hypercall inputs from XMM registers in kvm_hv_flush_tlb()
when possible.

Cc: Alexander Graf 
Co-developed-by: Evgeny Iakovlev 
Signed-off-by: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/include/asm/hyperv-tlfs.h |  2 +
 arch/x86/kvm/hyperv.c  | 90 +++---
 2 files changed, 73 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
b/arch/x86/include/asm/hyperv-tlfs.h
index e6cd3fee562b..ee6336a54f92 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -288,6 +288,8 @@ struct hv_tsc_emulation_status {
 #define HV_X64_MSR_TSC_REFERENCE_ENABLE0x0001
 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
 
+/* Number of XMM registers used in hypercall input/output */
+#define HV_HYPERCALL_MAX_XMM_REGISTERS 6
 
 /* Define hypervisor message types. */
 enum hv_message_type {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 8a542243e1cd..cd6c6f1f06a4 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -36,6 +36,7 @@
 
 #include "trace.h"
 #include "irq.h"
+#include "fpu.h"
 
 /* "Hv#1" signature */
 #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
@@ -1632,10 +1633,13 @@ struct kvm_hv_hcall {
u16 rep_idx;
bool fast;
bool rep;
+   sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
 };
 
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, 
bool ex)
 {
+   int i, j;
+   gpa_t gpa;
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct hv_tlb_flush_ex flush_ex;
@@ -1649,8 +1653,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
struct kvm_hv_hcall *hc, bool
bool all_cpus;
 
if (!ex) {
-   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, 
sizeof(flush
-   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   if (hc->fast) {
+   flush.address_space = hc->ingpa;
+   flush.flags = hc->outgpa;
+   flush.processor_mask = sse128_lo(hc->xmm[0]);
+   } else {
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa,
+   &flush, sizeof(flush
+   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   }
 
trace_kvm_hv_flush_tlb(flush.processor_mask,
   flush.address_space, flush.flags);
@@ -1668,9 +1679,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
struct kvm_hv_hcall *hc, bool
all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
flush.processor_mask == 0;
} else {
-   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
-   sizeof(flush_ex
-   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   if (hc->fast) {
+   flush_ex.address_space = hc->ingpa;
+   flush_ex.flags = hc->outgpa;
+   memcpy(&flush_ex.hv_vp_set,
+  &hc->xmm[0], sizeof(hc->xmm[0]));
+   } else {
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
+   sizeof(flush_ex
+   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   }
 
trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
  flush_ex.hv_vp_set.format,
@@ -1681,20 +1699,28 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
struct kvm_hv_hcall *hc, bool
all_cpus = flush_ex.hv_vp_set.format !=
HV_GENERIC_SET_SPARSE_4K;
 
-   sparse_banks_len =
-   bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
-   sizeof(sparse_banks[0]);
+   sparse_banks_len = bitmap_weight((unsigned long 
*)&valid_bank_mask, 64);
 
if (!sparse_banks_len && !all_cpus)
goto ret_success;
 
-   if (!all_cpus &&
-   kvm_read_guest(kvm,
-

[PATCH v3 1/4] KVM: x86: Move FPU register accessors into fpu.h

2021-04-13 Thread Siddharth Chandrasekaran

Hyper-v XMM fast hypercalls use XMM registers to pass input/output
parameters. To access these, hyperv.c can reuse some FPU register
accessors defined in emulator.c. Move them to a common location so both
can access them.

While at it, reorder the parameters of these accessor methods to make
them more readable.

Cc: Alexander Graf 
Cc: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/kvm/emulate.c | 137 +---
 arch/x86/kvm/fpu.h | 140 +
 arch/x86/kvm/kvm_emulate.h |   3 +-
 3 files changed, 158 insertions(+), 122 deletions(-)
 create mode 100644 arch/x86/kvm/fpu.h

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f7970ba6219f..3634c4c77fd4 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -22,7 +22,6 @@
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
 #include 
-#include 
 #include 
 #include 
 
@@ -1081,116 +1080,14 @@ static void fetch_register_operand(struct operand *op)
}
 }
 
-static void emulator_get_fpu(void)
-{
-   fpregs_lock();
-
-   fpregs_assert_state_consistent();
-   if (test_thread_flag(TIF_NEED_FPU_LOAD))
-   switch_fpu_return();
-}
-
-static void emulator_put_fpu(void)
-{
-   fpregs_unlock();
-}
-
-static void read_sse_reg(sse128_t *data, int reg)
-{
-   emulator_get_fpu();
-   switch (reg) {
-   case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
-   case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
-   case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
-   case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
-   case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
-   case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
-   case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
-   case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
-#ifdef CONFIG_X86_64
-   case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
-   case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
-   case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
-   case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
-   case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
-   case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
-   case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
-   case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
-#endif
-   default: BUG();
-   }
-   emulator_put_fpu();
-}
-
-static void write_sse_reg(sse128_t *data, int reg)
-{
-   emulator_get_fpu();
-   switch (reg) {
-   case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
-   case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
-   case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
-   case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
-   case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
-   case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
-   case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
-   case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
-#ifdef CONFIG_X86_64
-   case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
-   case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
-   case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
-   case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
-   case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
-   case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
-   case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
-   case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
-#endif
-   default: BUG();
-   }
-   emulator_put_fpu();
-}
-
-static void read_mmx_reg(u64 *data, int reg)
-{
-   emulator_get_fpu();
-   switch (reg) {
-   case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
-   case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
-   case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
-   case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
-   case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
-   case 5: asm("movq %%mm5, %0" : "=m&

[PATCH v3 0/4] Add support for XMM fast hypercalls

2021-04-13 Thread Siddharth Chandrasekaran

Hyper-V supports the use of XMM registers to perform fast hypercalls.
This allows guests to take advantage of the improved performance of the
fast hypercall interface even though a hypercall may require more than
(the current maximum of) two general purpose registers.

The XMM fast hypercall interface uses an additional six XMM registers
(XMM0 to XMM5) to allow the caller to pass an input parameter block of
up to 112 bytes. Hyper-V can also return data back to the guest in the
remaining XMM registers that are not used by the current hypercall.

Although the Hyper-v TLFS mentions that a guest cannot use this feature
unless the hypervisor advertises support for it, some hypercalls which
we plan on upstreaming in future uses them anyway. This patchset adds
necessary infrastructure for handling input/output via XMM registers and
patches kvm_hv_flush_tlb() to use xmm input arguments.

~ Sid.

v3:
- Remove inline for kvm_hv_hypercall_{read,write}_xmm()
- Fix typo: s/ouput/output/
- Remove sse128_t from kvm_emulate.h
- Reword comment to match TLFS wording
- Move num XMM registers macro to hyperv-tlfs.h
- Stop advertising HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE

v2:
- Remove inline for kvm_hv_hypercall_{read,write}_xmm()
- Fix typo: s/ouput/output/
- Remove sse128_t from kvm_emulate.h
- Reword comment to match TLFS wording
- Move num XMM registers macro to hyperv-tlfs.h
- Stop advertising HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE

Siddharth Chandrasekaran (4):
  KVM: x86: Move FPU register accessors into fpu.h
  KVM: hyper-v: Collect hypercall params into struct
  KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers
  KVM: hyper-v: Advertise support for fast XMM hypercalls

 arch/x86/include/asm/hyperv-tlfs.h |   9 +-
 arch/x86/kvm/emulate.c | 137 +++---
 arch/x86/kvm/fpu.h | 140 ++
 arch/x86/kvm/hyperv.c  | 222 +++--
 arch/x86/kvm/kvm_emulate.h |   3 +-
 5 files changed, 309 insertions(+), 202 deletions(-)
 create mode 100644 arch/x86/kvm/fpu.h

-- 
2.17.1



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH v2 3/4] KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers

2021-04-13 Thread Siddharth Chandrasekaran

On Tue, Apr 13, 2021 at 04:09:48PM +0200, Vitaly Kuznetsov wrote:
> Siddharth Chandrasekaran  writes:
> > Hyper-V supports the use of XMM registers to perform fast hypercalls.
> > This allows guests to take advantage of the improved performance of the
> > fast hypercall interface even though a hypercall may require more than
> > (the current maximum of) two input registers.
> >
> > The XMM fast hypercall interface uses six additional XMM registers (XMM0
> > to XMM5) to allow the guest to pass an input parameter block of up to
> > 112 bytes. Hyper-V can also return data back to the guest in the
> > remaining XMM registers that are not used by the current hypercall.
> >
> > Add framework to read/write to XMM registers in kvm_hv_hypercall() and
> > use the additional hypercall inputs from XMM registers in
> > kvm_hv_flush_tlb() when possible.
> >
> > Cc: Alexander Graf 
> > Co-developed-by: Evgeny Iakovlev 
> > Signed-off-by: Evgeny Iakovlev 
> > Signed-off-by: Siddharth Chandrasekaran 
> > ---
> >  arch/x86/kvm/hyperv.c | 109 ++
> >  1 file changed, 90 insertions(+), 19 deletions(-)
> >
> > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> > index 8f6babd1ea0d..1f9959aba70d 100644
> > --- a/arch/x86/kvm/hyperv.c
> > +++ b/arch/x86/kvm/hyperv.c
> > @@ -36,6 +36,7 @@
> >
> >  #include "trace.h"
> >  #include "irq.h"
> > +#include "fpu.h"
> >
> >  /* "Hv#1" signature */
> >  #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
> > @@ -1623,6 +1624,8 @@ static __always_inline unsigned long 
> > *sparse_set_to_vcpu_mask(
> >   return vcpu_bitmap;
> >  }
> >
> > +#define KVM_HV_HYPERCALL_MAX_XMM_REGISTERS  6
> 
> Nitpick: this is not KVM-specific so could probably go to 
> arch/x86/include/asm/hyperv-tlfs.h

Ack.

> > +
> >  struct kvm_hv_hcall {
> >   u64 param;
> >   u64 ingpa;
> > @@ -1632,10 +1635,14 @@ struct kvm_hv_hcall {
> >   u16 rep_idx;
> >   bool fast;
> >   bool rep;
> > + sse128_t xmm[KVM_HV_HYPERCALL_MAX_XMM_REGISTERS];
> > + bool xmm_dirty;
> >  };
> >
> >  static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall 
> > *hc, bool ex)
> >  {
> > + int i, j;
> > + gpa_t gpa;
> >   struct kvm *kvm = vcpu->kvm;
> >   struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> >   struct hv_tlb_flush_ex flush_ex;
> > @@ -1649,8 +1656,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
> > struct kvm_hv_hcall *hc, bool
> >   bool all_cpus;
> >
> >   if (!ex) {
> > - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, 
> > sizeof(flush
> > - return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + if (hc->fast) {
> > + flush.address_space = hc->ingpa;
> > + flush.flags = hc->outgpa;
> > + flush.processor_mask = sse128_lo(hc->xmm[0]);
> > + } else {
> > + if (unlikely(kvm_read_guest(kvm, hc->ingpa,
> > + &flush, sizeof(flush
> > + return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + }
> >
> >   trace_kvm_hv_flush_tlb(flush.processor_mask,
> >  flush.address_space, flush.flags);
> > @@ -1668,9 +1682,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
> > struct kvm_hv_hcall *hc, bool
> >   all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
> >   flush.processor_mask == 0;
> >   } else {
> > - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
> > - sizeof(flush_ex
> > - return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > + if (hc->fast) {
> > + flush_ex.address_space = hc->ingpa;
> > + flush_ex.flags = hc->outgpa;
> > + memcpy(&flush_ex.hv_vp_set,
> > +&hc->xmm[0], sizeof(hc->xmm[0]));
> > + } else {
> > + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
> > + sizeof(flush_ex
> > + return HV_STATUS_INVALID_HYPER

Re: [PATCH v2 2/4] KVM: hyper-v: Collect hypercall params into struct

2021-04-13 Thread Siddharth Chandrasekaran

On Tue, Apr 13, 2021 at 03:53:01PM +0200, Vitaly Kuznetsov wrote:
> Siddharth Chandrasekaran  writes:
> > As of now there are 7 parameters (and flags) that are used in various
> > hyper-v hypercall handlers. There are 6 more input/output parameters
> > passed from XMM registers which are to be added in an upcoming patch.
> >
> > To make passing arguments to the handlers more readable, capture all
> > these parameters into a single structure.
> >
> > Cc: Alexander Graf 
> > Cc: Evgeny Iakovlev 
> > Signed-off-by: Siddharth Chandrasekaran 
> > ---
> >  arch/x86/kvm/hyperv.c | 147 +++---
> >  1 file changed, 79 insertions(+), 68 deletions(-)
> >
> > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> > index f98370a39936..8f6babd1ea0d 100644
> > --- a/arch/x86/kvm/hyperv.c
> > +++ b/arch/x86/kvm/hyperv.c
> > @@ -1623,7 +1623,18 @@ static __always_inline unsigned long 
> > *sparse_set_to_vcpu_mask(
> >   return vcpu_bitmap;
> >  }
> >
> > -static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, 
> > bool ex)
> > +struct kvm_hv_hcall {
> > + u64 param;
> > + u64 ingpa;
> > + u64 outgpa;
> > + u16 code;
> > + u16 rep_cnt;
> > + u16 rep_idx;
> > + bool fast;
> > + bool rep;
> > +};
> > +
> > +static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall 
> > *hc, bool ex)
> 
> Nitpick: Would it make sense to also pack the fact that we're dealing
> with a hypercall using ExProcessorMasks into 'struct kvm_hv_hcall' and
> get rid of 'bool ex' parameter for both kvm_hv_flush_tlb() and
> kvm_hv_send_ipi()? 'struct kvm_hv_hcall' is already a synthetic
> aggregator for input and output so adding some other information there
> may not be that big of a stretch...

The other members of the struct are all hypercall parameters (or flags)
while the 'bool ex' is our way of handling ExProcessorMasks within the
same method.

Besides, in kvm_hv_hypercall() passing it as a 3rd argument looks
better than setting 'hc.ex = true' and than immediately calling the
method :-).

> >  {
> >   struct kvm *kvm = vcpu->kvm;
> >   struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> > @@ -1638,7 +1649,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
> > u64 ingpa, u16 rep_cnt, bool
> >   bool all_cpus;
> >
> >   if (!ex) {
> > - if (unlikely(kvm_read_guest(kvm, ingpa, &flush, 
> > sizeof(flush
> > + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, 
> > sizeof(flush
> >   return HV_STATUS_INVALID_HYPERCALL_INPUT;
> >
> >   trace_kvm_hv_flush_tlb(flush.processor_mask,
> > @@ -1657,7 +1668,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
> > u64 ingpa, u16 rep_cnt, bool
> >   all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
> >   flush.processor_mask == 0;
> >   } else {
> > - if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
> > + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
> >   sizeof(flush_ex
> >   return HV_STATUS_INVALID_HYPERCALL_INPUT;
> >
> > @@ -1679,8 +1690,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
> > u64 ingpa, u16 rep_cnt, bool
> >
> >   if (!all_cpus &&
> >   kvm_read_guest(kvm,
> > -ingpa + offsetof(struct hv_tlb_flush_ex,
> > - hv_vp_set.bank_contents),
> > +hc->ingpa + offsetof(struct 
> > hv_tlb_flush_ex,
> > + 
> > hv_vp_set.bank_contents),
> >  sparse_banks,
> >  sparse_banks_len))
> >   return HV_STATUS_INVALID_HYPERCALL_INPUT;
> > @@ -1700,9 +1711,9 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
> > u64 ingpa, u16 rep_cnt, bool
> >   NULL, vcpu_mask, &hv_vcpu->tlb_flush);
> >
> >  ret_success:
> > - /* We always do full TLB flush, set rep_done = rep_cnt. */
> > + /* We always do full TLB flush, set rep_done = hc->rep_cnt. */
> 
> Nitpicking: I'd suggest we word it a bit differently:
> 
> "We always do full TLB flush, set 'Reps completed' = 'Rep Count'."
> 
> so it matches TLFS rather than KVM internals.

Makes sense. Changed.

Thanks for your reviews.

~ Sid.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH v2 1/4] KVM: x86: Move FPU register accessors into fpu.h

2021-04-13 Thread Siddharth Chandrasekaran

On Tue, Apr 13, 2021 at 03:40:56PM +0200, Vitaly Kuznetsov wrote:
> Siddharth Chandrasekaran  writes:
> > @@ -0,0 +1,140 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#ifndef __KVM_FPU_H_
> > +#define __KVM_FPU_H_
> > +
> > +#include 
> > +
> > +typedef u32  __attribute__((vector_size(16))) sse128_t;
> 
> Post-patch we seem to have two definitions of 'sse128_t':
> 
> $ git grep sse128_t
> HEAD~3:arch/x86/kvm/fpu.h:typedef u32   
> __attribute__((vector_size(16))) sse128_t;
> HEAD~3:arch/x86/kvm/fpu.h:#define __sse128_uunion { sse128_t vec; u64 
> as_u64[2]; u32 as_u32[4]; }
> HEAD~3:arch/x86/kvm/fpu.h:static inline void _kvm_read_sse_reg(int reg, 
> sse128_t *data)
> HEAD~3:arch/x86/kvm/fpu.h:static inline void _kvm_write_sse_reg(int reg, 
> const sse128_t *data)
> HEAD~3:arch/x86/kvm/fpu.h:static inline void kvm_read_sse_reg(int reg, 
> sse128_t *data)
> HEAD~3:arch/x86/kvm/fpu.h:static inline void kvm_write_sse_reg(int reg, const 
> sse128_t *data)
> HEAD~3:arch/x86/kvm/kvm_emulate.h:typedef u32 
> __attribute__((vector_size(16))) sse128_t;
> HEAD~3:arch/x86/kvm/kvm_emulate.h:  char valptr[sizeof(sse128_t)];
> HEAD~3:arch/x86/kvm/kvm_emulate.h:  sse128_t vec_val;
> 
> Should the one from kvm_emulate.h go away?

Yes, removed.

~ Sid.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH v2 4/4] KVM: hyper-v: Advertise support for fast XMM hypercalls

2021-04-13 Thread Siddharth Chandrasekaran

On Mon, Apr 12, 2021 at 08:14:23PM +, Wei Liu wrote:
> On Mon, Apr 12, 2021 at 07:00:17PM +0200, Siddharth Chandrasekaran wrote:
> > Now that all extant hypercalls that can use XMM registers (based on
> > spec) for input/outputs are patched to support them, we can start
> > advertising this feature to guests.
> >
> > Cc: Alexander Graf 
> > Cc: Evgeny Iakovlev 
> > Signed-off-by: Siddharth Chandrasekaran 
> > ---
> >  arch/x86/include/asm/hyperv-tlfs.h | 7 ++-
> >  arch/x86/kvm/hyperv.c  | 2 ++
> >  2 files changed, 8 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
> > b/arch/x86/include/asm/hyperv-tlfs.h
> > index e6cd3fee562b..716f12be411e 100644
> > --- a/arch/x86/include/asm/hyperv-tlfs.h
> > +++ b/arch/x86/include/asm/hyperv-tlfs.h
> > @@ -52,7 +52,7 @@
> >   * Support for passing hypercall input parameter block via XMM
> >   * registers is available
> >   */
> > -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4)
> > +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4)
> >  /* Support for a virtual guest idle state is available */
> >  #define HV_X64_GUEST_IDLE_STATE_AVAILABLEBIT(5)
> >  /* Frequency MSRs available */
> > @@ -61,6 +61,11 @@
> >  #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
> >  /* Support for debug MSRs available */
> >  #define HV_FEATURE_DEBUG_MSRS_AVAILABLE  BIT(11)
> > +/*
> > + * Support for returning hypercall ouput block via XMM
> 
> "output"

Fixed. Thanks.

~ Sid.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH v2 3/4] KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers

2021-04-13 Thread Siddharth Chandrasekaran

On Mon, Apr 12, 2021 at 08:13:19PM +, Wei Liu wrote:
> On Mon, Apr 12, 2021 at 07:00:16PM +0200, Siddharth Chandrasekaran wrote:
> > +
> > +static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
> 
> Do you really need inline here? The compiler should be smart enough to
> inline this function if necessary.

Removed.

> > +{
> > + int reg;
> > +
> > + kvm_fpu_get();
> > + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
> > + _kvm_read_sse_reg(reg, &hc->xmm[reg]);
> > + kvm_fpu_put();
> > + hc->xmm_dirty = false;
> 
> There is no code that sets xmm_dirty to true? What am I missing? I guess
> that's because you haven't implemented the hypercalls that need writing
> back to guest?

Yes, when a hypercall want to return data via XMM registers, it should
update hc->xmm[] and set hc->dirty (I plan on using this in a future
patch). The reason why I didn't differ this change to actual patch
needs it is that it pairs nicely with the read/write xmm_reg() calls in
kvm_hv_hypercall().

~ Sid.

Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

[PATCH v2 3/4] KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers

2021-04-12 Thread Siddharth Chandrasekaran

Hyper-V supports the use of XMM registers to perform fast hypercalls.
This allows guests to take advantage of the improved performance of the
fast hypercall interface even though a hypercall may require more than
(the current maximum of) two input registers.

The XMM fast hypercall interface uses six additional XMM registers (XMM0
to XMM5) to allow the guest to pass an input parameter block of up to
112 bytes. Hyper-V can also return data back to the guest in the
remaining XMM registers that are not used by the current hypercall.

Add framework to read/write to XMM registers in kvm_hv_hypercall() and
use the additional hypercall inputs from XMM registers in
kvm_hv_flush_tlb() when possible.

Cc: Alexander Graf 
Co-developed-by: Evgeny Iakovlev 
Signed-off-by: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/kvm/hyperv.c | 109 ++
 1 file changed, 90 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 8f6babd1ea0d..1f9959aba70d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -36,6 +36,7 @@
 
 #include "trace.h"
 #include "irq.h"
+#include "fpu.h"
 
 /* "Hv#1" signature */
 #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
@@ -1623,6 +1624,8 @@ static __always_inline unsigned long 
*sparse_set_to_vcpu_mask(
return vcpu_bitmap;
 }
 
+#define KVM_HV_HYPERCALL_MAX_XMM_REGISTERS  6
+
 struct kvm_hv_hcall {
u64 param;
u64 ingpa;
@@ -1632,10 +1635,14 @@ struct kvm_hv_hcall {
u16 rep_idx;
bool fast;
bool rep;
+   sse128_t xmm[KVM_HV_HYPERCALL_MAX_XMM_REGISTERS];
+   bool xmm_dirty;
 };
 
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, 
bool ex)
 {
+   int i, j;
+   gpa_t gpa;
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct hv_tlb_flush_ex flush_ex;
@@ -1649,8 +1656,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
struct kvm_hv_hcall *hc, bool
bool all_cpus;
 
if (!ex) {
-   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, 
sizeof(flush
-   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   if (hc->fast) {
+   flush.address_space = hc->ingpa;
+   flush.flags = hc->outgpa;
+   flush.processor_mask = sse128_lo(hc->xmm[0]);
+   } else {
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa,
+   &flush, sizeof(flush
+   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   }
 
trace_kvm_hv_flush_tlb(flush.processor_mask,
   flush.address_space, flush.flags);
@@ -1668,9 +1682,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
struct kvm_hv_hcall *hc, bool
all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
flush.processor_mask == 0;
} else {
-   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
-   sizeof(flush_ex
-   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   if (hc->fast) {
+   flush_ex.address_space = hc->ingpa;
+   flush_ex.flags = hc->outgpa;
+   memcpy(&flush_ex.hv_vp_set,
+  &hc->xmm[0], sizeof(hc->xmm[0]));
+   } else {
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
+   sizeof(flush_ex
+   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   }
 
trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
  flush_ex.hv_vp_set.format,
@@ -1681,20 +1702,29 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
struct kvm_hv_hcall *hc, bool
all_cpus = flush_ex.hv_vp_set.format !=
HV_GENERIC_SET_SPARSE_4K;
 
-   sparse_banks_len =
-   bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
-   sizeof(sparse_banks[0]);
+   sparse_banks_len = bitmap_weight((unsigned long 
*)&valid_bank_mask, 64);
 
if (!sparse_banks_len && !all_cpus)
goto ret_success;
 
-   if (!all_cpus &&
-   kvm_read_guest(kvm,
-  hc->ingpa + offsetof(struct hv_tlb_flush_ex,
-   
hv_vp_set.bank_contents),
-  sparse_banks,
-

[PATCH v2 4/4] KVM: hyper-v: Advertise support for fast XMM hypercalls

2021-04-12 Thread Siddharth Chandrasekaran

Now that all extant hypercalls that can use XMM registers (based on
spec) for input/outputs are patched to support them, we can start
advertising this feature to guests.

Cc: Alexander Graf 
Cc: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/include/asm/hyperv-tlfs.h | 7 ++-
 arch/x86/kvm/hyperv.c  | 2 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
b/arch/x86/include/asm/hyperv-tlfs.h
index e6cd3fee562b..716f12be411e 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -52,7 +52,7 @@
  * Support for passing hypercall input parameter block via XMM
  * registers is available
  */
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE  BIT(4)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE   BIT(4)
 /* Support for a virtual guest idle state is available */
 #define HV_X64_GUEST_IDLE_STATE_AVAILABLE  BIT(5)
 /* Frequency MSRs available */
@@ -61,6 +61,11 @@
 #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE   BIT(10)
 /* Support for debug MSRs available */
 #define HV_FEATURE_DEBUG_MSRS_AVAILABLEBIT(11)
+/*
+ * Support for returning hypercall ouput block via XMM
+ * registers is available
+ */
+#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE  BIT(15)
 /* stimer Direct Mode is available */
 #define HV_STIMER_DIRECT_MODE_AVAILABLEBIT(19)
 
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 1f9959aba70d..55838c266bcd 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2254,6 +2254,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct 
kvm_cpuid2 *cpuid,
ent->ebx |= HV_POST_MESSAGES;
ent->ebx |= HV_SIGNAL_EVENTS;
 
+   ent->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
+   ent->edx |= HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE;
ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
 
-- 
2.17.1




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

[PATCH v2 0/4] Add support for XMM fast hypercalls

2021-04-12 Thread Siddharth Chandrasekaran

Hyper-V supports the use of XMM registers to perform fast hypercalls.
This allows guests to take advantage of the improved performance of the
fast hypercall interface even though a hypercall may require more than
(the current maximum of) two general purpose registers.

The XMM fast hypercall interface uses an additional six XMM registers
(XMM0 to XMM5) to allow the caller to pass an input parameter block of
up to 112 bytes. Hyper-V can also return data back to the guest in the
remaining XMM registers that are not used by the current hypercall.

Although the Hyper-v TLFS mentions that a guest cannot use this feature
unless the hypervisor advertises support for it, some hypercalls which
we plan on upstreaming in future uses them anyway. This patchset adds
necessary infrastructure for handling input/output via XMM registers and
patches kvm_hv_flush_tlb() to use xmm input arguments.

~ Sid.

v2:
- Add hc.fast to is_xmm_fast_hypercall() check
- Split CPUID feature bits for input and output

Siddharth Chandrasekaran (4):
  KVM: x86: Move FPU register accessors into fpu.h
  KVM: hyper-v: Collect hypercall params into struct
  KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers
  KVM: hyper-v: Advertise support for fast XMM hypercalls

 arch/x86/include/asm/hyperv-tlfs.h |   7 +-
 arch/x86/kvm/emulate.c | 138 +++-
 arch/x86/kvm/fpu.h | 140 +
 arch/x86/kvm/hyperv.c  | 242 +++--
 4 files changed, 327 insertions(+), 200 deletions(-)
 create mode 100644 arch/x86/kvm/fpu.h

-- 
2.17.1



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

[PATCH v2 2/4] KVM: hyper-v: Collect hypercall params into struct

2021-04-12 Thread Siddharth Chandrasekaran

As of now there are 7 parameters (and flags) that are used in various
hyper-v hypercall handlers. There are 6 more input/output parameters
passed from XMM registers which are to be added in an upcoming patch.

To make passing arguments to the handlers more readable, capture all
these parameters into a single structure.

Cc: Alexander Graf 
Cc: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/kvm/hyperv.c | 147 +++---
 1 file changed, 79 insertions(+), 68 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f98370a39936..8f6babd1ea0d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1623,7 +1623,18 @@ static __always_inline unsigned long 
*sparse_set_to_vcpu_mask(
return vcpu_bitmap;
 }
 
-static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, 
bool ex)
+struct kvm_hv_hcall {
+   u64 param;
+   u64 ingpa;
+   u64 outgpa;
+   u16 code;
+   u16 rep_cnt;
+   u16 rep_idx;
+   bool fast;
+   bool rep;
+};
+
+static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, 
bool ex)
 {
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
@@ -1638,7 +1649,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
bool all_cpus;
 
if (!ex) {
-   if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, 
sizeof(flush
return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
trace_kvm_hv_flush_tlb(flush.processor_mask,
@@ -1657,7 +1668,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
flush.processor_mask == 0;
} else {
-   if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
sizeof(flush_ex
return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
@@ -1679,8 +1690,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
 
if (!all_cpus &&
kvm_read_guest(kvm,
-  ingpa + offsetof(struct hv_tlb_flush_ex,
-   hv_vp_set.bank_contents),
+  hc->ingpa + offsetof(struct hv_tlb_flush_ex,
+   
hv_vp_set.bank_contents),
   sparse_banks,
   sparse_banks_len))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
@@ -1700,9 +1711,9 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
NULL, vcpu_mask, &hv_vcpu->tlb_flush);
 
 ret_success:
-   /* We always do full TLB flush, set rep_done = rep_cnt. */
+   /* We always do full TLB flush, set rep_done = hc->rep_cnt. */
return (u64)HV_STATUS_SUCCESS |
-   ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
+   ((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
 }
 
 static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
@@ -1724,8 +1735,7 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 
vector,
}
 }
 
-static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, u64 ingpa, u64 outgpa,
-  bool ex, bool fast)
+static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, 
bool ex)
 {
struct kvm *kvm = vcpu->kvm;
struct hv_send_ipi_ex send_ipi_ex;
@@ -1740,25 +1750,25 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, u64 
ingpa, u64 outgpa,
bool all_cpus;
 
if (!ex) {
-   if (!fast) {
-   if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
+   if (!hc->fast) {
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
sizeof(send_ipi
return HV_STATUS_INVALID_HYPERCALL_INPUT;
sparse_banks[0] = send_ipi.cpu_mask;
vector = send_ipi.vector;
} else {
/* 'reserved' part of hv_send_ipi should be 0 */
-   if (unlikely(ingpa >> 32 != 0))
+   if (unlikely(hc->ingpa >> 32 != 0))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
-   sparse_banks[0] = outgpa;
-   vector = (u32)ingpa;
+   sparse_banks[0] = hc-&

[PATCH v2 1/4] KVM: x86: Move FPU register accessors into fpu.h

2021-04-12 Thread Siddharth Chandrasekaran

Hyper-v XMM fast hypercalls use XMM registers to pass input/output
parameters. To access these, hyperv.c can reuse some FPU register
accessors defined in emulator.c. Move them to a common location so both
can access them.

While at it, reorder the parameters of these accessor methods to make
them more readable.

Cc: Alexander Graf 
Cc: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/kvm/emulate.c | 138 ++--
 arch/x86/kvm/fpu.h | 140 +
 2 files changed, 158 insertions(+), 120 deletions(-)
 create mode 100644 arch/x86/kvm/fpu.h

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f7970ba6219f..296f8f3ce988 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -22,7 +22,6 @@
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
 #include 
-#include 
 #include 
 #include 
 
@@ -30,6 +29,7 @@
 #include "tss.h"
 #include "mmu.h"
 #include "pmu.h"
+#include "fpu.h"
 
 /*
  * Operand types
@@ -1081,116 +1081,14 @@ static void fetch_register_operand(struct operand *op)
}
 }
 
-static void emulator_get_fpu(void)
-{
-   fpregs_lock();
-
-   fpregs_assert_state_consistent();
-   if (test_thread_flag(TIF_NEED_FPU_LOAD))
-   switch_fpu_return();
-}
-
-static void emulator_put_fpu(void)
-{
-   fpregs_unlock();
-}
-
-static void read_sse_reg(sse128_t *data, int reg)
-{
-   emulator_get_fpu();
-   switch (reg) {
-   case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
-   case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
-   case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
-   case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
-   case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
-   case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
-   case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
-   case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
-#ifdef CONFIG_X86_64
-   case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
-   case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
-   case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
-   case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
-   case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
-   case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
-   case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
-   case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
-#endif
-   default: BUG();
-   }
-   emulator_put_fpu();
-}
-
-static void write_sse_reg(sse128_t *data, int reg)
-{
-   emulator_get_fpu();
-   switch (reg) {
-   case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
-   case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
-   case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
-   case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
-   case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
-   case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
-   case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
-   case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
-#ifdef CONFIG_X86_64
-   case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
-   case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
-   case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
-   case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
-   case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
-   case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
-   case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
-   case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
-#endif
-   default: BUG();
-   }
-   emulator_put_fpu();
-}
-
-static void read_mmx_reg(u64 *data, int reg)
-{
-   emulator_get_fpu();
-   switch (reg) {
-   case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
-   case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
-   case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
-   case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
-   case 4: asm(&q

Re: [PATCH 4/4] KVM: hyper-v: Advertise support for fast XMM hypercalls

2021-04-12 Thread Siddharth Chandrasekaran

On Thu, Apr 08, 2021 at 04:44:23PM +0200, Vitaly Kuznetsov wrote:
> Siddharth Chandrasekaran  writes:
> > On Thu, Apr 08, 2021 at 02:05:53PM +0200, Vitaly Kuznetsov wrote:
> >> Siddharth Chandrasekaran  writes:
> >> > Now that all extant hypercalls that can use XMM registers (based on
> >> > spec) for input/outputs are patched to support them, we can start
> >> > advertising this feature to guests.
> >> >
> >> > Cc: Alexander Graf 
> >> > Cc: Evgeny Iakovlev 
> >> > Signed-off-by: Siddharth Chandrasekaran 
> >> > ---
> >> >  arch/x86/include/asm/hyperv-tlfs.h | 4 ++--
> >> >  arch/x86/kvm/hyperv.c  | 1 +
> >> >  2 files changed, 3 insertions(+), 2 deletions(-)
> >> >
> >> > diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
> >> > b/arch/x86/include/asm/hyperv-tlfs.h
> >> > index e6cd3fee562b..1f160ef60509 100644
> >> > --- a/arch/x86/include/asm/hyperv-tlfs.h
> >> > +++ b/arch/x86/include/asm/hyperv-tlfs.h
> >> > @@ -49,10 +49,10 @@
> >> >  /* Support for physical CPU dynamic partitioning events is available*/
> >> >  #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLEBIT(3)
> >> >  /*
> >> > - * Support for passing hypercall input parameter block via XMM
> >> > + * Support for passing hypercall input and output parameter block via 
> >> > XMM
> >> >   * registers is available
> >> >   */
> >> > -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4)
> >> > +#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4) | 
> >> > BIT(15)
> >>
> >> TLFS 6.0b states that there are two distinct bits for input and output:
> >>
> >> CPUID Leaf 0x4003.EDX:
> >> Bit 4: support for passing hypercall input via XMM registers is available.
> >> Bit 15: support for returning hypercall output via XMM registers is 
> >> available.
> >>
> >> and HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE is not currently used
> >> anywhere, I'd suggest we just rename
> >>
> >> HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE to 
> >> HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE
> >> and add HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE (bit 15).
> >
> > That is how I had it initially; but then noticed that we would never
> > need to use either of them separately. So it seemed like a reasonable
> > abstraction to put them together.
> >
> 
> Actually, we may. In theory, KVM userspace may decide to expose just
> one of these two to the guest as it is not obliged to copy everything
> from KVM_GET_SUPPORTED_HV_CPUID so we will need separate
> guest_cpuid_has() checks.

Looks like guest_cpuid_has() check is for x86 CPU features only (if I'm
not mistaken) and I don't see a suitable alternative that looks into
vcpu->arch.cpuid_entries[]. So I plan to add a new method
hv_guest_cpuid_has() in hyperv.c to have this check; does that sound
right to you?

If you can give a quick go-ahead, I'll make the changes requested so
far and send v2 this series.

Thanks.

~ Sid.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 0/4] Add support for XMM fast hypercalls

2021-04-09 Thread Siddharth Chandrasekaran

On Fri, Apr 09, 2021 at 09:42:41AM +0200, Vitaly Kuznetsov wrote:
> Siddharth Chandrasekaran  writes:
> > On Thu, Apr 08, 2021 at 04:30:18PM +, Wei Liu wrote:
> >> On Thu, Apr 08, 2021 at 05:54:43PM +0200, Siddharth Chandrasekaran wrote:
> >> > On Thu, Apr 08, 2021 at 05:48:19PM +0200, Paolo Bonzini wrote:
> >> > > On 08/04/21 17:40, Siddharth Chandrasekaran wrote:
> >> > > > > > > Although the Hyper-v TLFS mentions that a guest cannot use 
> >> > > > > > > this feature
> >> > > > > > > unless the hypervisor advertises support for it, some 
> >> > > > > > > hypercalls which
> >> > > > > > > we plan on upstreaming in future uses them anyway.
> >> > > > > > No, please don't do this. Check the feature bit(s) before you 
> >> > > > > > issue
> >> > > > > > hypercalls which rely on the extended interface.
> >> > > > > Perhaps Siddharth should clarify this, but I read it as Hyper-V 
> >> > > > > being
> >> > > > > buggy and using XMM arguments unconditionally.
> >> > > > The guest is at fault here as it expects Hyper-V to consume arguments
> >> > > > from XMM registers for certain hypercalls (that we are working) even 
> >> > > > if
> >> > > > we didn't expose the feature via CPUID bits.
> >> > >
> >> > > What guest is that?
> >> >
> >> > It is a Windows Server 2016.
> >>
> >> Can you be more specific? Are you implementing some hypercalls from
> >> TLFS? If so, which ones?
> >
> > Yes all of them are from TLFS. We are implementing VSM and there are a
> > bunch of hypercalls that we have implemented to manage VTL switches,
> > memory protection and virtual interrupts.
> 
> Wow, sounds awesome! Do you plan to upstream this work?

Yes, that is the plan :-)

> > The following 3 hypercalls that use the XMM fast hypercalls are relevant
> > to this patch set:
> >
> > HvCallModifyVtlProtectionMask
> > HvGetVpRegisters
> > HvSetVpRegisters
> 
> It seems AccessVSM and AccessVpRegisters privilges have implicit
> dependency on XMM input/output. This will need to be enforced in KVM
> userspace.

Noted.

~ Sid.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 4/4] KVM: hyper-v: Advertise support for fast XMM hypercalls

2021-04-09 Thread Siddharth Chandrasekaran

On Fri, Apr 09, 2021 at 09:38:03AM +0200, Vitaly Kuznetsov wrote:
> Siddharth Chandrasekaran  writes:
> > On Thu, Apr 08, 2021 at 04:44:23PM +0200, Vitaly Kuznetsov wrote:
> >> Siddharth Chandrasekaran  writes:
> >> > On Thu, Apr 08, 2021 at 02:05:53PM +0200, Vitaly Kuznetsov wrote:
> >> >> Siddharth Chandrasekaran  writes:
> >> >>
> >> >> > Now that all extant hypercalls that can use XMM registers (based on
> >> >> > spec) for input/outputs are patched to support them, we can start
> >> >> > advertising this feature to guests.
> >> >> >
> >> >> > Cc: Alexander Graf 
> >> >> > Cc: Evgeny Iakovlev 
> >> >> > Signed-off-by: Siddharth Chandrasekaran 
> >> >> > ---
> >> >> >  arch/x86/include/asm/hyperv-tlfs.h | 4 ++--
> >> >> >  arch/x86/kvm/hyperv.c  | 1 +
> >> >> >  2 files changed, 3 insertions(+), 2 deletions(-)
> >> >> >
> >> >> > diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
> >> >> > b/arch/x86/include/asm/hyperv-tlfs.h
> >> >> > index e6cd3fee562b..1f160ef60509 100644
> >> >> > --- a/arch/x86/include/asm/hyperv-tlfs.h
> >> >> > +++ b/arch/x86/include/asm/hyperv-tlfs.h
> >> >> > @@ -49,10 +49,10 @@
> >> >> >  /* Support for physical CPU dynamic partitioning events is 
> >> >> > available*/
> >> >> >  #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLEBIT(3)
> >> >> >  /*
> >> >> > - * Support for passing hypercall input parameter block via XMM
> >> >> > + * Support for passing hypercall input and output parameter block 
> >> >> > via XMM
> >> >> >   * registers is available
> >> >> >   */
> >> >> > -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4)
> >> >> > +#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4) 
> >> >> > | BIT(15)
> >> >>
> >> >> TLFS 6.0b states that there are two distinct bits for input and output:
> >> >>
> >> >> CPUID Leaf 0x4003.EDX:
> >> >> Bit 4: support for passing hypercall input via XMM registers is 
> >> >> available.
> >> >> Bit 15: support for returning hypercall output via XMM registers is 
> >> >> available.
> >> >>
> >> >> and HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE is not currently used
> >> >> anywhere, I'd suggest we just rename
> >> >>
> >> >> HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE to 
> >> >> HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE
> >> >> and add HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE (bit 15).
> >> >
> >> > That is how I had it initially; but then noticed that we would never
> >> > need to use either of them separately. So it seemed like a reasonable
> >> > abstraction to put them together.
> >> >
> >>
> >> Actually, we may. In theory, KVM userspace may decide to expose just
> >> one of these two to the guest as it is not obliged to copy everything
> >> from KVM_GET_SUPPORTED_HV_CPUID so we will need separate
> >> guest_cpuid_has() checks.
> >
> > Makes sense. I'll split them and add the checks.
> >
> >> (This reminds me of something I didn't see in your series:
> >> we need to check that XMM hypercall parameters support was actually
> >> exposed to the guest as it is illegal for a guest to use it otherwise --
> >> and we will likely need two checks, for input and output).
> >
> > We observed that Windows expects Hyper-V to support XMM params even if
> > we don't advertise this feature but if userspace wants to hide this
> > feature and the guest does it anyway, then it makes sense to treat it as
> > an illegal OP.
> >
> 
> Out of pure curiosity, which Windows version behaves like that? And how
> does this work with KVM without your patches?

The guest is a Windows Server 2016 on which we are trying to enable VBS
and handle it through the VSM API. When VBS is enabled on the guest, it
starts using many other (new) hypercalls and some of them don't honor
the CPUID bits (4, 15) that indicate the presence of XMM params support.

> Sane KVM userspaces will certainly expose both XMM input and output
> capabilities together but having an ability to hide one or both of them
> may come handy while debugging.
> 
> Also, we weren't enforcing the rule that enlightenments not exposed to
> the guest don't work, even the whole Hyper-V emulation interface was
> available to all guests who were smart enough to know how to enable it!
> I don't like this for two reasons: security (large attack surface) and
> the fact that someone 'smart' may decide to use Hyper-V emulation
> features on KVM as 'general purpose' features saying 'they're always
> available anyway', this risks becoming an ABI.
> 
> Let's at least properly check if the feature was exposed to the guest
> for all new enlightenments.

Agreed.

~ Sid.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 0/4] Add support for XMM fast hypercalls

2021-04-08 Thread Siddharth Chandrasekaran

On Thu, Apr 08, 2021 at 04:30:18PM +, Wei Liu wrote:
> On Thu, Apr 08, 2021 at 05:54:43PM +0200, Siddharth Chandrasekaran wrote:
> > On Thu, Apr 08, 2021 at 05:48:19PM +0200, Paolo Bonzini wrote:
> > > On 08/04/21 17:40, Siddharth Chandrasekaran wrote:
> > > > > > > Although the Hyper-v TLFS mentions that a guest cannot use this 
> > > > > > > feature
> > > > > > > unless the hypervisor advertises support for it, some hypercalls 
> > > > > > > which
> > > > > > > we plan on upstreaming in future uses them anyway.
> > > > > > No, please don't do this. Check the feature bit(s) before you issue
> > > > > > hypercalls which rely on the extended interface.
> > > > > Perhaps Siddharth should clarify this, but I read it as Hyper-V being
> > > > > buggy and using XMM arguments unconditionally.
> > > > The guest is at fault here as it expects Hyper-V to consume arguments
> > > > from XMM registers for certain hypercalls (that we are working) even if
> > > > we didn't expose the feature via CPUID bits.
> > >
> > > What guest is that?
> >
> > It is a Windows Server 2016.
> 
> Can you be more specific? Are you implementing some hypercalls from
> TLFS? If so, which ones?

Yes all of them are from TLFS. We are implementing VSM and there are a
bunch of hypercalls that we have implemented to manage VTL switches,
memory protection and virtual interrupts.

The following 3 hypercalls that use the XMM fast hypercalls are relevant
to this patch set:

HvCallModifyVtlProtectionMask
HvGetVpRegisters 
HvSetVpRegisters 

~ Sid.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 0/4] Add support for XMM fast hypercalls

2021-04-08 Thread Siddharth Chandrasekaran

On Thu, Apr 08, 2021 at 03:38:13PM +, Wei Liu wrote:
> On Thu, Apr 08, 2021 at 05:30:26PM +0200, Paolo Bonzini wrote:
> > On 08/04/21 17:28, Wei Liu wrote:
> > > > Although the Hyper-v TLFS mentions that a guest cannot use this feature
> > > > unless the hypervisor advertises support for it, some hypercalls which
> > > > we plan on upstreaming in future uses them anyway.
> > >
> > > No, please don't do this. Check the feature bit(s) before you issue
> > > hypercalls which rely on the extended interface.
> >
> > Perhaps Siddharth should clarify this, but I read it as Hyper-V being buggy
> > and using XMM arguments unconditionally.
> >
> 
> There is no code in upstream Linux that uses the XMM fast hypercall
> interface at the moment.
> 
> If there is such code, it has bugs in it and should be fixed. :-)

None of the existing hypercalls are buggy. These are some hypercalls we
are working on (and planning on upstreaming in the near future). 

~ Sid.

Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 4/4] KVM: hyper-v: Advertise support for fast XMM hypercalls

2021-04-08 Thread Siddharth Chandrasekaran

On Thu, Apr 08, 2021 at 03:44:46PM +, Wei Liu wrote:
> CAUTION: This email originated from outside of the organization. Do not click 
> links or open attachments unless you can confirm the sender and know the 
> content is safe.
>
>
>
> On Thu, Apr 08, 2021 at 04:20:54PM +0200, Siddharth Chandrasekaran wrote:
> > On Thu, Apr 08, 2021 at 02:05:53PM +0200, Vitaly Kuznetsov wrote:
> > > Siddharth Chandrasekaran  writes:
> > >
> > > > Now that all extant hypercalls that can use XMM registers (based on
> > > > spec) for input/outputs are patched to support them, we can start
> > > > advertising this feature to guests.
> > > >
> > > > Cc: Alexander Graf 
> > > > Cc: Evgeny Iakovlev 
> > > > Signed-off-by: Siddharth Chandrasekaran 
> > > > ---
> > > >  arch/x86/include/asm/hyperv-tlfs.h | 4 ++--
> > > >  arch/x86/kvm/hyperv.c  | 1 +
> > > >  2 files changed, 3 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
> > > > b/arch/x86/include/asm/hyperv-tlfs.h
> > > > index e6cd3fee562b..1f160ef60509 100644
> > > > --- a/arch/x86/include/asm/hyperv-tlfs.h
> > > > +++ b/arch/x86/include/asm/hyperv-tlfs.h
> > > > @@ -49,10 +49,10 @@
> > > >  /* Support for physical CPU dynamic partitioning events is available*/
> > > >  #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLEBIT(3)
> > > >  /*
> > > > - * Support for passing hypercall input parameter block via XMM
> > > > + * Support for passing hypercall input and output parameter block via 
> > > > XMM
> > > >   * registers is available
> > > >   */
> > > > -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4)
> > > > +#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4) | 
> > > > BIT(15)
> > >
> > > TLFS 6.0b states that there are two distinct bits for input and output:
> > >
> > > CPUID Leaf 0x4003.EDX:
> > > Bit 4: support for passing hypercall input via XMM registers is available.
> > > Bit 15: support for returning hypercall output via XMM registers is 
> > > available.
> > >
> > > and HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE is not currently used
> > > anywhere, I'd suggest we just rename
> > >
> > > HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE to 
> > > HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE
> > > and add HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE (bit 15).
> >
> > That is how I had it initially; but then noticed that we would never
> > need to use either of them separately. So it seemed like a reasonable
> > abstraction to put them together.
> >
>
> They are two separate things in TLFS. Please use two macros here.

Ack, will split them.

~ Sid.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 0/4] Add support for XMM fast hypercalls

2021-04-08 Thread Siddharth Chandrasekaran

On Thu, Apr 08, 2021 at 05:48:19PM +0200, Paolo Bonzini wrote:
> On 08/04/21 17:40, Siddharth Chandrasekaran wrote:
> > > > > Although the Hyper-v TLFS mentions that a guest cannot use this 
> > > > > feature
> > > > > unless the hypervisor advertises support for it, some hypercalls which
> > > > > we plan on upstreaming in future uses them anyway.
> > > > No, please don't do this. Check the feature bit(s) before you issue
> > > > hypercalls which rely on the extended interface.
> > > Perhaps Siddharth should clarify this, but I read it as Hyper-V being
> > > buggy and using XMM arguments unconditionally.
> > The guest is at fault here as it expects Hyper-V to consume arguments
> > from XMM registers for certain hypercalls (that we are working) even if
> > we didn't expose the feature via CPUID bits.
>
> What guest is that?

It is a Windows Server 2016.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 4/4] KVM: hyper-v: Advertise support for fast XMM hypercalls

2021-04-08 Thread Siddharth Chandrasekaran

On Thu, Apr 08, 2021 at 04:44:23PM +0200, Vitaly Kuznetsov wrote:
> CAUTION: This email originated from outside of the organization. Do not click 
> links or open attachments unless you can confirm the sender and know the 
> content is safe.
>
>
>
> Siddharth Chandrasekaran  writes:
>
> > On Thu, Apr 08, 2021 at 02:05:53PM +0200, Vitaly Kuznetsov wrote:
> >> Siddharth Chandrasekaran  writes:
> >>
> >> > Now that all extant hypercalls that can use XMM registers (based on
> >> > spec) for input/outputs are patched to support them, we can start
> >> > advertising this feature to guests.
> >> >
> >> > Cc: Alexander Graf 
> >> > Cc: Evgeny Iakovlev 
> >> > Signed-off-by: Siddharth Chandrasekaran 
> >> > ---
> >> >  arch/x86/include/asm/hyperv-tlfs.h | 4 ++--
> >> >  arch/x86/kvm/hyperv.c  | 1 +
> >> >  2 files changed, 3 insertions(+), 2 deletions(-)
> >> >
> >> > diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
> >> > b/arch/x86/include/asm/hyperv-tlfs.h
> >> > index e6cd3fee562b..1f160ef60509 100644
> >> > --- a/arch/x86/include/asm/hyperv-tlfs.h
> >> > +++ b/arch/x86/include/asm/hyperv-tlfs.h
> >> > @@ -49,10 +49,10 @@
> >> >  /* Support for physical CPU dynamic partitioning events is available*/
> >> >  #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLEBIT(3)
> >> >  /*
> >> > - * Support for passing hypercall input parameter block via XMM
> >> > + * Support for passing hypercall input and output parameter block via 
> >> > XMM
> >> >   * registers is available
> >> >   */
> >> > -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4)
> >> > +#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4) | 
> >> > BIT(15)
> >>
> >> TLFS 6.0b states that there are two distinct bits for input and output:
> >>
> >> CPUID Leaf 0x4003.EDX:
> >> Bit 4: support for passing hypercall input via XMM registers is available.
> >> Bit 15: support for returning hypercall output via XMM registers is 
> >> available.
> >>
> >> and HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE is not currently used
> >> anywhere, I'd suggest we just rename
> >>
> >> HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE to 
> >> HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE
> >> and add HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE (bit 15).
> >
> > That is how I had it initially; but then noticed that we would never
> > need to use either of them separately. So it seemed like a reasonable
> > abstraction to put them together.
> >
>
> Actually, we may. In theory, KVM userspace may decide to expose just
> one of these two to the guest as it is not obliged to copy everything
> from KVM_GET_SUPPORTED_HV_CPUID so we will need separate
> guest_cpuid_has() checks.

Makes sense. I'll split them and add the checks.

> (This reminds me of something I didn't see in your series:
> we need to check that XMM hypercall parameters support was actually
> exposed to the guest as it is illegal for a guest to use it otherwise --
> and we will likely need two checks, for input and output).

We observed that Windows expects Hyper-V to support XMM params even if
we don't advertise this feature but if userspace wants to hide this
feature and the guest does it anyway, then it makes sense to treat it as
an illegal OP.

> Also, (and that's what triggered my comment) all other HV_ACCESS_* in
> kvm_get_hv_cpuid() are single bits so my first impression was that you
> forgot one bit, but then I saw that you combined them together.

~ Sid.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 0/4] Add support for XMM fast hypercalls

2021-04-08 Thread Siddharth Chandrasekaran

On Thu, Apr 08, 2021 at 05:30:26PM +0200, Paolo Bonzini wrote:
> CAUTION: This email originated from outside of the organization. Do not click 
> links or open attachments unless you can confirm the sender and know the 
> content is safe.
>
>
>
> On 08/04/21 17:28, Wei Liu wrote:
> > > Although the Hyper-v TLFS mentions that a guest cannot use this feature
> > > unless the hypervisor advertises support for it, some hypercalls which
> > > we plan on upstreaming in future uses them anyway.
> >
> > No, please don't do this. Check the feature bit(s) before you issue
> > hypercalls which rely on the extended interface.
>
> Perhaps Siddharth should clarify this, but I read it as Hyper-V being
> buggy and using XMM arguments unconditionally.

The guest is at fault here as it expects Hyper-V to consume arguments
from XMM registers for certain hypercalls (that we are working) even if
we didn't expose the feature via CPUID bits.

~ Sid.

Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH 4/4] KVM: hyper-v: Advertise support for fast XMM hypercalls

2021-04-08 Thread Siddharth Chandrasekaran

On Thu, Apr 08, 2021 at 02:05:53PM +0200, Vitaly Kuznetsov wrote:
> Siddharth Chandrasekaran  writes:
>
> > Now that all extant hypercalls that can use XMM registers (based on
> > spec) for input/outputs are patched to support them, we can start
> > advertising this feature to guests.
> >
> > Cc: Alexander Graf 
> > Cc: Evgeny Iakovlev 
> > Signed-off-by: Siddharth Chandrasekaran 
> > ---
> >  arch/x86/include/asm/hyperv-tlfs.h | 4 ++--
> >  arch/x86/kvm/hyperv.c  | 1 +
> >  2 files changed, 3 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
> > b/arch/x86/include/asm/hyperv-tlfs.h
> > index e6cd3fee562b..1f160ef60509 100644
> > --- a/arch/x86/include/asm/hyperv-tlfs.h
> > +++ b/arch/x86/include/asm/hyperv-tlfs.h
> > @@ -49,10 +49,10 @@
> >  /* Support for physical CPU dynamic partitioning events is available*/
> >  #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLEBIT(3)
> >  /*
> > - * Support for passing hypercall input parameter block via XMM
> > + * Support for passing hypercall input and output parameter block via XMM
> >   * registers is available
> >   */
> > -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4)
> > +#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLEBIT(4) | 
> > BIT(15)
>
> TLFS 6.0b states that there are two distinct bits for input and output:
>
> CPUID Leaf 0x4003.EDX:
> Bit 4: support for passing hypercall input via XMM registers is available.
> Bit 15: support for returning hypercall output via XMM registers is available.
>
> and HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE is not currently used
> anywhere, I'd suggest we just rename
>
> HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE to HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE
> and add HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE (bit 15).

That is how I had it initially; but then noticed that we would never
need to use either of them separately. So it seemed like a reasonable
abstraction to put them together.

~ Sid.



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

[PATCH 0/4] Add support for XMM fast hypercalls

2021-04-07 Thread Siddharth Chandrasekaran

Hyper-V supports the use of XMM registers to perform fast hypercalls.
This allows guests to take advantage of the improved performance of the
fast hypercall interface even though a hypercall may require more than
(the current maximum of) two general purpose registers.

The XMM fast hypercall interface uses an additional six XMM registers
(XMM0 to XMM5) to allow the caller to pass an input parameter block of
up to 112 bytes. Hyper-V can also return data back to the guest in the
remaining XMM registers that are not used by the current hypercall.

Although the Hyper-v TLFS mentions that a guest cannot use this feature
unless the hypervisor advertises support for it, some hypercalls which
we plan on upstreaming in future uses them anyway. This patchset adds
necessary infrastructure for handling input/output via XMM registers and
patches kvm_hv_flush_tlb() to use xmm input arguments.

~ Sid.

Siddharth Chandrasekaran (4):
  KVM: x86: Move FPU register accessors into fpu.h
  KVM: hyper-v: Collect hypercall params into struct
  KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers
  KVM: hyper-v: Advertise support for fast XMM hypercalls

 arch/x86/include/asm/hyperv-tlfs.h |   4 +-
 arch/x86/kvm/emulate.c | 138 +++--
 arch/x86/kvm/fpu.h | 140 +
 arch/x86/kvm/hyperv.c  | 241 +++--
 4 files changed, 322 insertions(+), 201 deletions(-)
 create mode 100644 arch/x86/kvm/fpu.h

-- 
2.17.1




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

[PATCH 4/4] KVM: hyper-v: Advertise support for fast XMM hypercalls

2021-04-07 Thread Siddharth Chandrasekaran

Now that all extant hypercalls that can use XMM registers (based on
spec) for input/outputs are patched to support them, we can start
advertising this feature to guests.

Cc: Alexander Graf 
Cc: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/include/asm/hyperv-tlfs.h | 4 ++--
 arch/x86/kvm/hyperv.c  | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h 
b/arch/x86/include/asm/hyperv-tlfs.h
index e6cd3fee562b..1f160ef60509 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -49,10 +49,10 @@
 /* Support for physical CPU dynamic partitioning events is available*/
 #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE  BIT(3)
 /*
- * Support for passing hypercall input parameter block via XMM
+ * Support for passing hypercall input and output parameter block via XMM
  * registers is available
  */
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE  BIT(4)
+#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE  BIT(4) | BIT(15)
 /* Support for a virtual guest idle state is available */
 #define HV_X64_GUEST_IDLE_STATE_AVAILABLE  BIT(5)
 /* Frequency MSRs available */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index bf2f86f263f1..dd462c1d641d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2254,6 +2254,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct 
kvm_cpuid2 *cpuid,
ent->ebx |= HV_POST_MESSAGES;
ent->ebx |= HV_SIGNAL_EVENTS;
 
+   ent->edx |= HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE;
ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
 
-- 
2.17.1




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

[PATCH 3/4] KVM: x86: kvm_hv_flush_tlb use inputs from XMM registers

2021-04-07 Thread Siddharth Chandrasekaran

Hyper-V supports the use of XMM registers to perform fast hypercalls.
This allows guests to take advantage of the improved performance of the
fast hypercall interface even though a hypercall may require more than
(the current maximum of) two input registers.

The XMM fast hypercall interface uses six additional XMM registers (XMM0
to XMM5) to allow the guest to pass an input parameter block of up to
112 bytes. Hyper-V can also return data back to the guest in the
remaining XMM registers that are not used by the current hypercall.

Add framework to read/write to XMM registers in kvm_hv_hypercall() and
use the additional hypercall inputs from XMM registers in
kvm_hv_flush_tlb() when possible.

Cc: Alexander Graf 
Co-developed-by: Evgeny Iakovlev 
Signed-off-by: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/kvm/hyperv.c | 109 ++
 1 file changed, 90 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 8f6babd1ea0d..bf2f86f263f1 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -36,6 +36,7 @@
 
 #include "trace.h"
 #include "irq.h"
+#include "fpu.h"
 
 /* "Hv#1" signature */
 #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
@@ -1623,6 +1624,8 @@ static __always_inline unsigned long 
*sparse_set_to_vcpu_mask(
return vcpu_bitmap;
 }
 
+#define KVM_HV_HYPERCALL_MAX_XMM_REGISTERS  6
+
 struct kvm_hv_hcall {
u64 param;
u64 ingpa;
@@ -1632,10 +1635,14 @@ struct kvm_hv_hcall {
u16 rep_idx;
bool fast;
bool rep;
+   sse128_t xmm[KVM_HV_HYPERCALL_MAX_XMM_REGISTERS];
+   bool xmm_dirty;
 };
 
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, 
bool ex)
 {
+   int i, j;
+   gpa_t gpa;
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
struct hv_tlb_flush_ex flush_ex;
@@ -1649,8 +1656,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
struct kvm_hv_hcall *hc, bool
bool all_cpus;
 
if (!ex) {
-   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, 
sizeof(flush
-   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   if (hc->fast) {
+   flush.address_space = hc->ingpa;
+   flush.flags = hc->outgpa;
+   flush.processor_mask = sse128_lo(hc->xmm[0]);
+   } else {
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa,
+   &flush, sizeof(flush
+   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   }
 
trace_kvm_hv_flush_tlb(flush.processor_mask,
   flush.address_space, flush.flags);
@@ -1668,9 +1682,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
struct kvm_hv_hcall *hc, bool
all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
flush.processor_mask == 0;
} else {
-   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
-   sizeof(flush_ex
-   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   if (hc->fast) {
+   flush_ex.address_space = hc->ingpa;
+   flush_ex.flags = hc->outgpa;
+   memcpy(&flush_ex.hv_vp_set,
+  &hc->xmm[0], sizeof(hc->xmm[0]));
+   } else {
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
+   sizeof(flush_ex
+   return HV_STATUS_INVALID_HYPERCALL_INPUT;
+   }
 
trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
  flush_ex.hv_vp_set.format,
@@ -1681,20 +1702,29 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, 
struct kvm_hv_hcall *hc, bool
all_cpus = flush_ex.hv_vp_set.format !=
HV_GENERIC_SET_SPARSE_4K;
 
-   sparse_banks_len =
-   bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
-   sizeof(sparse_banks[0]);
+   sparse_banks_len = bitmap_weight((unsigned long 
*)&valid_bank_mask, 64);
 
if (!sparse_banks_len && !all_cpus)
goto ret_success;
 
-   if (!all_cpus &&
-   kvm_read_guest(kvm,
-  hc->ingpa + offsetof(struct hv_tlb_flush_ex,
-   
hv_vp_set.bank_contents),
-  sparse_banks,
-

[PATCH 2/4] KVM: hyper-v: Collect hypercall params into struct

2021-04-07 Thread Siddharth Chandrasekaran

As of now there are 7 parameters (and flags) that are used in various
hyper-v hypercall handlers. There are 6 more input/output parameters
passed from XMM registers which are to be added in an upcoming patch.

To make passing arguments to the handlers more readable, capture all
these parameters into a single structure.

Cc: Alexander Graf 
Cc: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/kvm/hyperv.c | 147 +++---
 1 file changed, 79 insertions(+), 68 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f98370a39936..8f6babd1ea0d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1623,7 +1623,18 @@ static __always_inline unsigned long 
*sparse_set_to_vcpu_mask(
return vcpu_bitmap;
 }
 
-static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, 
bool ex)
+struct kvm_hv_hcall {
+   u64 param;
+   u64 ingpa;
+   u64 outgpa;
+   u16 code;
+   u16 rep_cnt;
+   u16 rep_idx;
+   bool fast;
+   bool rep;
+};
+
+static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, 
bool ex)
 {
struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
@@ -1638,7 +1649,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
bool all_cpus;
 
if (!ex) {
-   if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, 
sizeof(flush
return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
trace_kvm_hv_flush_tlb(flush.processor_mask,
@@ -1657,7 +1668,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
flush.processor_mask == 0;
} else {
-   if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
sizeof(flush_ex
return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
@@ -1679,8 +1690,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
 
if (!all_cpus &&
kvm_read_guest(kvm,
-  ingpa + offsetof(struct hv_tlb_flush_ex,
-   hv_vp_set.bank_contents),
+  hc->ingpa + offsetof(struct hv_tlb_flush_ex,
+   
hv_vp_set.bank_contents),
   sparse_banks,
   sparse_banks_len))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
@@ -1700,9 +1711,9 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 
ingpa, u16 rep_cnt, bool
NULL, vcpu_mask, &hv_vcpu->tlb_flush);
 
 ret_success:
-   /* We always do full TLB flush, set rep_done = rep_cnt. */
+   /* We always do full TLB flush, set rep_done = hc->rep_cnt. */
return (u64)HV_STATUS_SUCCESS |
-   ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
+   ((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
 }
 
 static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
@@ -1724,8 +1735,7 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 
vector,
}
 }
 
-static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, u64 ingpa, u64 outgpa,
-  bool ex, bool fast)
+static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, 
bool ex)
 {
struct kvm *kvm = vcpu->kvm;
struct hv_send_ipi_ex send_ipi_ex;
@@ -1740,25 +1750,25 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, u64 
ingpa, u64 outgpa,
bool all_cpus;
 
if (!ex) {
-   if (!fast) {
-   if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
+   if (!hc->fast) {
+   if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
sizeof(send_ipi
return HV_STATUS_INVALID_HYPERCALL_INPUT;
sparse_banks[0] = send_ipi.cpu_mask;
vector = send_ipi.vector;
} else {
/* 'reserved' part of hv_send_ipi should be 0 */
-   if (unlikely(ingpa >> 32 != 0))
+   if (unlikely(hc->ingpa >> 32 != 0))
return HV_STATUS_INVALID_HYPERCALL_INPUT;
-   sparse_banks[0] = outgpa;
-   vector = (u32)ingpa;
+   sparse_banks[0] = hc-&

[PATCH 1/4] KVM: x86: Move FPU register accessors into fpu.h

2021-04-07 Thread Siddharth Chandrasekaran

Hyper-v XMM fast hypercalls use XMM registers to pass input/output
parameters. To access these, hyperv.c can reuse some FPU register
accessors defined in emulator.c. Move them to a common location so both
can access them.

While at it, reorder the parameters of these accessor methods to make
them more readable.

Cc: Alexander Graf 
Cc: Evgeny Iakovlev 
Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/kvm/emulate.c | 138 ++--
 arch/x86/kvm/fpu.h | 140 +
 2 files changed, 158 insertions(+), 120 deletions(-)
 create mode 100644 arch/x86/kvm/fpu.h

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f7970ba6219f..296f8f3ce988 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -22,7 +22,6 @@
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
 #include 
-#include 
 #include 
 #include 
 
@@ -30,6 +29,7 @@
 #include "tss.h"
 #include "mmu.h"
 #include "pmu.h"
+#include "fpu.h"
 
 /*
  * Operand types
@@ -1081,116 +1081,14 @@ static void fetch_register_operand(struct operand *op)
}
 }
 
-static void emulator_get_fpu(void)
-{
-   fpregs_lock();
-
-   fpregs_assert_state_consistent();
-   if (test_thread_flag(TIF_NEED_FPU_LOAD))
-   switch_fpu_return();
-}
-
-static void emulator_put_fpu(void)
-{
-   fpregs_unlock();
-}
-
-static void read_sse_reg(sse128_t *data, int reg)
-{
-   emulator_get_fpu();
-   switch (reg) {
-   case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
-   case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
-   case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
-   case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
-   case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
-   case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
-   case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
-   case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
-#ifdef CONFIG_X86_64
-   case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
-   case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
-   case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
-   case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
-   case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
-   case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
-   case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
-   case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
-#endif
-   default: BUG();
-   }
-   emulator_put_fpu();
-}
-
-static void write_sse_reg(sse128_t *data, int reg)
-{
-   emulator_get_fpu();
-   switch (reg) {
-   case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
-   case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
-   case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
-   case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
-   case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
-   case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
-   case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
-   case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
-#ifdef CONFIG_X86_64
-   case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
-   case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
-   case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
-   case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
-   case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
-   case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
-   case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
-   case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
-#endif
-   default: BUG();
-   }
-   emulator_put_fpu();
-}
-
-static void read_mmx_reg(u64 *data, int reg)
-{
-   emulator_get_fpu();
-   switch (reg) {
-   case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
-   case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
-   case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
-   case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
-   case 4: asm(&q

[PATCH] KVM: make: Fix out-of-source module builds

2021-03-24 Thread Siddharth Chandrasekaran

Building kvm module out-of-source with,

make -C $SRC O=$BIN M=arch/x86/kvm

fails to find "irq.h" as the include dir passed to cflags-y does not
prefix the source dir. Fix this by prefixing $(srctree) to the include
dir path.

Signed-off-by: Siddharth Chandrasekaran 
---
 arch/x86/kvm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 1b4766fe1de2..eafc4d601f25 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-ccflags-y += -Iarch/x86/kvm
+ccflags-y += -I $(srctree)/arch/x86/kvm
 ccflags-$(CONFIG_KVM_WERROR) += -Werror
 
 ifeq ($(CONFIG_FRAME_POINTER),y)
-- 
2.17.1




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879

Re: [PATCH] Backport security fixe to 4.9 and 4.4 stable trees

2020-05-15 Thread Siddharth Chandrasekaran

On Fri, May 15, 2020 at 02:57:01PM +0200, Greg KH wrote:
> On Fri, May 15, 2020 at 06:19:45PM +0530, Siddharth Chandrasekaran wrote:
> > Please ignore this patch set, I accidentally added another patch I was
> > working on. Will send v2 with the right patches.
> 
> What patch set?  I see nothing in this email, so I have no idea what you
> are referring to :(

Apologies! Looks like my email thread was broken. I was referring to
the thread here: https://lkml.org/lkml/2020/5/14/1326 with subject:

  "[PATCH] Backport security fixe to 4.9 and 4.4 stable trees"

The corrected version (v2) of this patch should have reached you
(hopefully) with the subject:

  "[PATCH v2] Backport xfs security fix to 4.9 and 4.4 stable trees"

Thanks!

-- Sid.

[PATCH v2] Backport xfs security fix to 4.9 and 4.4 stable trees

2020-05-15 Thread Siddharth Chandrasekaran

Hello,

Lack of proper validation that cached inodes are free during allocation can,
cause a crash in fs/xfs/xfs_icache.c (refer: CVE-2018-13093). To address this
issue, I'm backporting upstream commit [1] to 4.4 and 4.9 stable trees
(a backport of [1] to 4.14 already exists).

Also, commit [1] references another commit [2] which added checks only to
xfs_iget_cache_miss(). In this patch, those checks have been moved into a
dedicated checker method and both xfs_iget_cache_miss() and
xfs_iget_cache_hit() are made to call that method. This code reorg in commit
[1], makes commit [2] redundant in the history of the 4.9 and 4.4 stable
trees. So commit [2] is not being backported.

-- Sid

[1]: afca6c5b2595 ("xfs: validate cached inodes are free when allocated")
[2]: ee457001ed6c ("xfs: catch inode allocation state mismatch corruption")

change log:
v2:
 - Reword cover letter.
 - Fix accidental worong patch that got mailed.

-- 
2.7.4

[PATCH 4.9 v2] xfs: validate cached inodes are free when allocated

2020-05-15 Thread Siddharth Chandrasekaran

From: Dave Chinner 

commit afca6c5b2595fc44383919fba740c194b0b76aff upstream.

A recent fuzzed filesystem image cached random dcache corruption
when the reproducer was run. This often showed up as panics in
lookup_slow() on a null inode->i_ops pointer when doing pathwalks.

BUG: unable to handle kernel NULL pointer dereference at 

Call Trace:
 lookup_slow+0x44/0x60
 walk_component+0x3dd/0x9f0
 link_path_walk+0x4a7/0x830
 path_lookupat+0xc1/0x470
 filename_lookup+0x129/0x270
 user_path_at_empty+0x36/0x40
 path_listxattr+0x98/0x110
 SyS_listxattr+0x13/0x20
 do_syscall_64+0xf5/0x280
 entry_SYSCALL_64_after_hwframe+0x42/0xb7

but had many different failure modes including deadlocks trying to
lock the inode that was just allocated or KASAN reports of
use-after-free violations.

The cause of the problem was a corrupt INOBT on a v4 fs where the
root inode was marked as free in the inobt record. Hence when we
allocated an inode, it chose the root inode to allocate, found it in
the cache and re-initialised it.

We recently fixed a similar inode allocation issue caused by inobt
record corruption problem in xfs_iget_cache_miss() in commit
ee457001ed6c ("xfs: catch inode allocation state mismatch
corruption"). This change adds similar checks to the cache-hit path
to catch it, and turns the reproducer into a corruption shutdown
situation.

Reported-by: Wen Xu 
Signed-Off-By: Dave Chinner 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Carlos Maiolino 
Reviewed-by: Darrick J. Wong 
[darrick: fix typos in comment]
Signed-off-by: Darrick J. Wong 
Signed-off-by: Siddharth Chandrasekaran 
---
 fs/xfs/xfs_icache.c | 57 ++---
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 86a4911..d668d30 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -308,6 +308,46 @@ xfs_reinit_inode(
 }
 
 /*
+ * If we are allocating a new inode, then check what was returned is
+ * actually a free, empty inode. If we are not allocating an inode,
+ * then check we didn't find a free inode.
+ *
+ * Returns:
+ * 0   if the inode free state matches the lookup context
+ * -ENOENT if the inode is free and we are not allocating
+ * -EFSCORRUPTED   if there is any state mismatch at all
+ */
+static int
+xfs_iget_check_free_state(
+   struct xfs_inode*ip,
+   int flags)
+{
+   if (flags & XFS_IGET_CREATE) {
+   /* should be a free inode */
+   if (VFS_I(ip)->i_mode != 0) {
+   xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)",
+   ip->i_ino, VFS_I(ip)->i_mode);
+   return -EFSCORRUPTED;
+   }
+
+   if (ip->i_d.di_nblocks != 0) {
+   xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx has blocks allocated!",
+   ip->i_ino);
+   return -EFSCORRUPTED;
+   }
+   return 0;
+   }
+
+   /* should be an allocated inode */
+   if (VFS_I(ip)->i_mode == 0)
+   return -ENOENT;
+
+   return 0;
+}
+
+/*
  * Check the validity of the inode we just found it the cache
  */
 static int
@@ -356,12 +396,12 @@ xfs_iget_cache_hit(
}
 
/*
-* If lookup is racing with unlink return an error immediately.
+* Check the inode free state is valid. This also detects lookup
+* racing with unlinks.
 */
-   if (VFS_I(ip)->i_mode == 0 && !(flags & XFS_IGET_CREATE)) {
-   error = -ENOENT;
+   error = xfs_iget_check_free_state(ip, flags);
+   if (error)
goto out_error;
-   }
 
/*
 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
@@ -471,10 +511,13 @@ xfs_iget_cache_miss(
 
trace_xfs_iget_miss(ip);
 
-   if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
-   error = -ENOENT;
+   /*
+* Check the inode free state is valid. This also detects lookup
+* racing with unlinks.
+*/
+   error = xfs_iget_check_free_state(ip, flags);
+   if (error)
goto out_destroy;
-   }
 
/*
 * Preload the radix tree so we can insert safely under the
-- 
2.7.4

[PATCH 4.4 v2] xfs: validate cached inodes are free when allocated

2020-05-15 Thread Siddharth Chandrasekaran

From: Dave Chinner 

commit afca6c5b2595fc44383919fba740c194b0b76aff upstream.

A recent fuzzed filesystem image cached random dcache corruption
when the reproducer was run. This often showed up as panics in
lookup_slow() on a null inode->i_ops pointer when doing pathwalks.

BUG: unable to handle kernel NULL pointer dereference at 

Call Trace:
 lookup_slow+0x44/0x60
 walk_component+0x3dd/0x9f0
 link_path_walk+0x4a7/0x830
 path_lookupat+0xc1/0x470
 filename_lookup+0x129/0x270
 user_path_at_empty+0x36/0x40
 path_listxattr+0x98/0x110
 SyS_listxattr+0x13/0x20
 do_syscall_64+0xf5/0x280
 entry_SYSCALL_64_after_hwframe+0x42/0xb7

but had many different failure modes including deadlocks trying to
lock the inode that was just allocated or KASAN reports of
use-after-free violations.

The cause of the problem was a corrupt INOBT on a v4 fs where the
root inode was marked as free in the inobt record. Hence when we
allocated an inode, it chose the root inode to allocate, found it in
the cache and re-initialised it.

We recently fixed a similar inode allocation issue caused by inobt
record corruption problem in xfs_iget_cache_miss() in commit
ee457001ed6c ("xfs: catch inode allocation state mismatch
corruption"). This change adds similar checks to the cache-hit path
to catch it, and turns the reproducer into a corruption shutdown
situation.

Reported-by: Wen Xu 
Signed-Off-By: Dave Chinner 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Carlos Maiolino 
Reviewed-by: Darrick J. Wong 
[darrick: fix typos in comment]
Signed-off-by: Darrick J. Wong 
Signed-off-by: Siddharth Chandrasekaran 
---
 fs/xfs/xfs_icache.c | 57 ++---
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index adbc1f5..7efeefb 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -135,6 +135,46 @@ xfs_inode_free(
 }
 
 /*
+ * If we are allocating a new inode, then check what was returned is
+ * actually a free, empty inode. If we are not allocating an inode,
+ * then check we didn't find a free inode.
+ *
+ * Returns:
+ * 0   if the inode free state matches the lookup context
+ * -ENOENT if the inode is free and we are not allocating
+ * -EFSCORRUPTED   if there is any state mismatch at all
+ */
+static int
+xfs_iget_check_free_state(
+   struct xfs_inode*ip,
+   int flags)
+{
+   if (flags & XFS_IGET_CREATE) {
+   /* should be a free inode */
+   if (VFS_I(ip)->i_mode != 0) {
+   xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)",
+   ip->i_ino, VFS_I(ip)->i_mode);
+   return -EFSCORRUPTED;
+   }
+
+   if (ip->i_d.di_nblocks != 0) {
+   xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx has blocks allocated!",
+   ip->i_ino);
+   return -EFSCORRUPTED;
+   }
+   return 0;
+   }
+
+   /* should be an allocated inode */
+   if (VFS_I(ip)->i_mode == 0)
+   return -ENOENT;
+
+   return 0;
+}
+
+/*
  * Check the validity of the inode we just found it the cache
  */
 static int
@@ -183,12 +223,12 @@ xfs_iget_cache_hit(
}
 
/*
-* If lookup is racing with unlink return an error immediately.
+* Check the inode free state is valid. This also detects lookup
+* racing with unlinks.
 */
-   if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
-   error = -ENOENT;
+   error = xfs_iget_check_free_state(ip, flags);
+   if (error)
goto out_error;
-   }
 
/*
 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
@@ -298,10 +338,13 @@ xfs_iget_cache_miss(
 
trace_xfs_iget_miss(ip);
 
-   if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
-   error = -ENOENT;
+   /*
+* Check the inode free state is valid. This also detects lookup
+* racing with unlinks.
+*/
+   error = xfs_iget_check_free_state(ip, flags);
+   if (error)
goto out_destroy;
-   }
 
/*
 * Preload the radix tree so we can insert safely under the
-- 
2.7.4

Re: [PATCH] Backport security fixe to 4.9 and 4.4 stable trees

2020-05-15 Thread Siddharth Chandrasekaran

Please ignore this patch set, I accidentally added another patch I was
working on. Will send v2 with the right patches.

Thanks!

-- Sid.

[PATCH v4.4] xfs: validate cached inodes are free when allocated

2020-05-14 Thread Siddharth Chandrasekaran

From: Dave Chinner 

commit afca6c5b2595fc44383919fba740c194b0b76aff upstream.

A recent fuzzed filesystem image cached random dcache corruption
when the reproducer was run. This often showed up as panics in
lookup_slow() on a null inode->i_ops pointer when doing pathwalks.

BUG: unable to handle kernel NULL pointer dereference at 

Call Trace:
 lookup_slow+0x44/0x60
 walk_component+0x3dd/0x9f0
 link_path_walk+0x4a7/0x830
 path_lookupat+0xc1/0x470
 filename_lookup+0x129/0x270
 user_path_at_empty+0x36/0x40
 path_listxattr+0x98/0x110
 SyS_listxattr+0x13/0x20
 do_syscall_64+0xf5/0x280
 entry_SYSCALL_64_after_hwframe+0x42/0xb7

but had many different failure modes including deadlocks trying to
lock the inode that was just allocated or KASAN reports of
use-after-free violations.

The cause of the problem was a corrupt INOBT on a v4 fs where the
root inode was marked as free in the inobt record. Hence when we
allocated an inode, it chose the root inode to allocate, found it in
the cache and re-initialised it.

We recently fixed a similar inode allocation issue caused by inobt
record corruption problem in xfs_iget_cache_miss() in commit
ee457001ed6c ("xfs: catch inode allocation state mismatch
corruption"). This change adds similar checks to the cache-hit path
to catch it, and turns the reproducer into a corruption shutdown
situation.

Reported-by: Wen Xu 
Signed-Off-By: Dave Chinner 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Carlos Maiolino 
Reviewed-by: Darrick J. Wong 
[darrick: fix typos in comment]
Signed-off-by: Darrick J. Wong 
Signed-off-by: Siddharth Chandrasekaran 
---
 fs/xfs/xfs_icache.c | 57 ++---
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index adbc1f5..7efeefb 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -135,6 +135,46 @@ xfs_inode_free(
 }
 
 /*
+ * If we are allocating a new inode, then check what was returned is
+ * actually a free, empty inode. If we are not allocating an inode,
+ * then check we didn't find a free inode.
+ *
+ * Returns:
+ * 0   if the inode free state matches the lookup context
+ * -ENOENT if the inode is free and we are not allocating
+ * -EFSCORRUPTED   if there is any state mismatch at all
+ */
+static int
+xfs_iget_check_free_state(
+   struct xfs_inode*ip,
+   int flags)
+{
+   if (flags & XFS_IGET_CREATE) {
+   /* should be a free inode */
+   if (VFS_I(ip)->i_mode != 0) {
+   xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)",
+   ip->i_ino, VFS_I(ip)->i_mode);
+   return -EFSCORRUPTED;
+   }
+
+   if (ip->i_d.di_nblocks != 0) {
+   xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx has blocks allocated!",
+   ip->i_ino);
+   return -EFSCORRUPTED;
+   }
+   return 0;
+   }
+
+   /* should be an allocated inode */
+   if (VFS_I(ip)->i_mode == 0)
+   return -ENOENT;
+
+   return 0;
+}
+
+/*
  * Check the validity of the inode we just found it the cache
  */
 static int
@@ -183,12 +223,12 @@ xfs_iget_cache_hit(
}
 
/*
-* If lookup is racing with unlink return an error immediately.
+* Check the inode free state is valid. This also detects lookup
+* racing with unlinks.
 */
-   if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
-   error = -ENOENT;
+   error = xfs_iget_check_free_state(ip, flags);
+   if (error)
goto out_error;
-   }
 
/*
 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
@@ -298,10 +338,13 @@ xfs_iget_cache_miss(
 
trace_xfs_iget_miss(ip);
 
-   if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
-   error = -ENOENT;
+   /*
+* Check the inode free state is valid. This also detects lookup
+* racing with unlinks.
+*/
+   error = xfs_iget_check_free_state(ip, flags);
+   if (error)
goto out_destroy;
-   }
 
/*
 * Preload the radix tree so we can insert safely under the
-- 
2.7.4

[PATCH v4.9] xfs: More robust inode extent count validation

2020-05-14 Thread Siddharth Chandrasekaran

From: Dave Chinner 

commit 23fcb3340d033d9f081e21e6c12c2db7eaa541d3 upstream.

When the inode is in extent format, it can't have more extents that
fit in the inode fork. We don't currenty check this, and so this
corruption goes unnoticed by the inode verifiers. This can lead to
crashes operating on invalid in-memory structures.

Attempts to access such a inode will now error out in the verifier
rather than allowing modification operations to proceed.

Reported-by: Wen Xu 
Signed-off-by: Dave Chinner 
Reviewed-by: Darrick J. Wong 
[darrick: fix a typedef, add some braces and breaks to shut up compiler 
warnings]
Signed-off-by: Darrick J. Wong 
Signed-off-by: Siddharth Chandrasekaran 
---
 fs/xfs/libxfs/xfs_format.h|   3 ++
 fs/xfs/libxfs/xfs_inode_buf.c | 112 --
 2 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 6b7579e..9967d30 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -979,6 +979,9 @@ typedef enum xfs_dinode_fmt {
XFS_DFORK_DSIZE(dip, mp) : \
XFS_DFORK_ASIZE(dip, mp))
 
+#define XFS_DFORK_MAXEXT(dip, mp, w) \
+   (XFS_DFORK_SIZE(dip, mp, w) / sizeof(struct xfs_bmbt_rec))
+
 /*
  * Return pointers to the data or attribute forks.
  */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 37ee7f0..ee035a4 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -381,7 +381,48 @@ xfs_log_dinode_to_disk(
}
 }
 
-static bool
+static xfs_failaddr_t
+xfs_dinode_verify_fork(
+   struct xfs_dinode   *dip,
+   struct xfs_mount*mp,
+   int whichfork)
+{
+   uint32_tdi_nextents = XFS_DFORK_NEXTENTS(dip, 
whichfork);
+
+   switch (XFS_DFORK_FORMAT(dip, whichfork)) {
+   case XFS_DINODE_FMT_LOCAL:
+   /*
+* no local regular files yet
+*/
+   if (whichfork == XFS_DATA_FORK) {
+   if (S_ISREG(be16_to_cpu(dip->di_mode)))
+   return __this_address;
+   if (be64_to_cpu(dip->di_size) >
+   XFS_DFORK_SIZE(dip, mp, whichfork))
+   return __this_address;
+   }
+   if (di_nextents)
+   return __this_address;
+   break;
+   case XFS_DINODE_FMT_EXTENTS:
+   if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork))
+   return __this_address;
+   break;
+   case XFS_DINODE_FMT_BTREE:
+   if (whichfork == XFS_ATTR_FORK) {
+   if (di_nextents > MAXAEXTNUM)
+   return __this_address;
+   } else if (di_nextents > MAXEXTNUM) {
+   return __this_address;
+   }
+   break;
+   default:
+   return __this_address;
+   }
+   return NULL;
+}
+
+xfs_failaddr_t
 xfs_dinode_verify(
struct xfs_mount*mp,
struct xfs_inode*ip,
@@ -403,8 +444,73 @@ xfs_dinode_verify(
return false;
 
/* No zero-length symlinks/dirs. */
-   if ((S_ISLNK(mode) || S_ISDIR(mode)) && dip->di_size == 0)
-   return false;
+   if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
+   return __this_address;
+
+   /* Fork checks carried over from xfs_iformat_fork */
+   if (mode &&
+   be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
+   be64_to_cpu(dip->di_nblocks))
+   return __this_address;
+
+   if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
+   return __this_address;
+
+   flags = be16_to_cpu(dip->di_flags);
+
+   if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
+   return __this_address;
+
+   /* Do we have appropriate data fork formats for the mode? */
+   switch (mode & S_IFMT) {
+   case S_IFIFO:
+   case S_IFCHR:
+   case S_IFBLK:
+   case S_IFSOCK:
+   if (dip->di_format != XFS_DINODE_FMT_DEV)
+   return __this_address;
+   break;
+   case S_IFREG:
+   case S_IFLNK:
+   case S_IFDIR:
+   fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK);
+   if (fa)
+   return fa;
+   break;
+   case 0:
+   /* Uninitialized inode ok. */
+   break;
+   default:
+   return __this_address;
+   }
+
+   if (XFS_DFORK_Q(dip)) {
+   fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK);
+   if (fa)
+

[PATCH] Backport security fixe to 4.9 and 4.4 stable trees

2020-05-14 Thread Siddharth Chandrasekaran

Due to lack of proper validation that cached inodes are free during allocation,
causes a crash (refer to CVE-2018-13093 for more details). To address this
issue, I'm backporting upstream commit [1] to 4.4 and 4.9 stable trees
(a backport of [1] to 4.14 already exists).

Also, commit [1] references another commit [2] which added checks only to
xfs_iget_cache_miss(). In this patch, those checks have been moved into a
dedicated checker method and both xfs_iget_cache_miss() and
xfs_iget_cache_hit() are made to call that method. This code reorg in commit
[1], makes commit [2] redundant in the history of the 4.9 and 4.4 stable
trees. So commit [2] is not being backported.

-- Sid

[1]: afca6c5b2595f ("xfs: validate cached inodes are free when allocated")
[2]: ee457001ed6c ("xfs: catch inode allocation state mismatch corruption")

[v4.9]
Dave Chinner (1):
  xfs: More robust inode extent count validation

 fs/xfs/libxfs/xfs_format.h|   3 ++
 fs/xfs/libxfs/xfs_inode_buf.c | 112 --
 2 files changed, 112 insertions(+), 3 deletions(-)

[v.4.4]
Dave Chinner (1):
  xfs: validate cached inodes are free when allocated

 fs/xfs/xfs_icache.c | 57 ++---
 1 file changed, 50 insertions(+), 7 deletions(-)

-- 
2.7.4

39 matches

Mail list logo