2018-06-22 16:56+0200, Vitaly Kuznetsov:
> Using hypercall for sending IPIs is faster because this allows to specify
> any number of vCPUs (even > 64 with sparse CPU set), the whole procedure
> will take only one VMEXIT.
> 
> Current Hyper-V TLFS (v5.0b) claims that HvCallSendSyntheticClusterIpi
> hypercall can't be 'fast' (passing parameters through registers) but
> apparently this is not true, Windows always uses it as 'fast' so we need
> to support that.
> 
> Signed-off-by: Vitaly Kuznetsov <[email protected]>
> ---
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> @@ -1357,6 +1357,108 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu 
> *current_vcpu, u64 ingpa,
>               ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
>  }
>  
> +static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 
> outgpa,
> +                        bool ex, bool fast)
> +{
> +     struct kvm *kvm = current_vcpu->kvm;
> +     struct hv_send_ipi_ex send_ipi_ex;
> +     struct hv_send_ipi send_ipi;
> +     struct kvm_vcpu *vcpu;
> +     unsigned long valid_bank_mask = 0;
> +     u64 sparse_banks[64];
> +     int sparse_banks_len, i;
> +     struct kvm_lapic_irq irq = {0};
> +     bool all_cpus;
> +
> +     if (!ex) {
> +             if (!fast) {
> +                     if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
> +                                                 sizeof(send_ipi))))
> +                             return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +                     sparse_banks[0] = send_ipi.cpu_mask;
> +                     irq.vector = send_ipi.vector;
> +             } else {
> +                     /* 'reserved' part of hv_send_ipi should be 0 */
> +                     if (unlikely(ingpa >> 32 != 0))
> +                             return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +                     sparse_banks[0] = outgpa;
> +                     irq.vector = (u32)ingpa;
> +             }
> +             all_cpus = false;
> +
> +             trace_kvm_hv_send_ipi(irq.vector, sparse_banks[0]);
> +     } else {
> +             if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
> +                                         sizeof(send_ipi_ex))))
> +                     return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +
> +             trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
> +                                      send_ipi_ex.vp_set.format,
> +                                      send_ipi_ex.vp_set.valid_bank_mask);
> +
> +             irq.vector = send_ipi_ex.vector;
> +             valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
> +             sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
> +                     sizeof(sparse_banks[0]);
> +             all_cpus = send_ipi_ex.vp_set.format !=
> +                     HV_GENERIC_SET_SPARSE_4K;

This would be much better readable as

  send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL

And if Microsoft ever adds more formats, they won't be all VCPUs, so
we're future-proofing as well.

> +
> +             if (!sparse_banks_len)
> +                     goto ret_success;
> +
> +             if (!all_cpus &&
> +                 kvm_read_guest(kvm,
> +                                ingpa + offsetof(struct hv_send_ipi_ex,
> +                                                 vp_set.bank_contents),
> +                                sparse_banks,
> +                                sparse_banks_len))
> +                     return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +     }
> +
> +     if ((irq.vector < HV_IPI_LOW_VECTOR) ||
> +         (irq.vector > HV_IPI_HIGH_VECTOR))
> +             return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +
> +     irq.delivery_mode = APIC_DM_FIXED;

I'd set this during variable definition.

APIC_DM_FIXED is 0 anyway and the compiler probably won't optimize it
here due to function with side effects since definition.

> +
> +     kvm_for_each_vcpu(i, vcpu, kvm) {
> +             struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
> +             int bank = hv->vp_index / 64, sbank = 0;
> +
> +             if (!all_cpus) {
> +                     /* Banks >64 can't be represented */
> +                     if (bank >= 64)
> +                             continue;
> +
> +                     /* Non-ex hypercalls can only address first 64 vCPUs */
> +                     if (!ex && bank)
> +                             continue;
> +
> +                     if (ex) {
> +                             /*
> +                              * Check is the bank of this vCPU is in sparse
> +                              * set and get the sparse bank number.
> +                              */
> +                             sbank = get_sparse_bank_no(valid_bank_mask,
> +                                                        bank);
> +
> +                             if (sbank < 0)
> +                                     continue;
> +                     }
> +
> +                     if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
> +                             continue;
> +             }
> +
> +             /* We fail only when APIC is disabled */
> +             if (!kvm_apic_set_irq(vcpu, &irq, NULL))
> +                     return HV_STATUS_INVALID_HYPERCALL_INPUT;

Does Windows use this even for 1 VCPU IPI?

I'm thinking we could apply the same optimization we do for LAPIC -- RCU
protected array that maps vp_index to vcpu.

Thanks.

> +     }
> +
> +ret_success:
> +     return HV_STATUS_SUCCESS;
> +}
> +
>  bool kvm_hv_hypercall_enabled(struct kvm *kvm)
>  {
>       return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & 
> HV_X64_MSR_HYPERCALL_ENABLE;
> @@ -1526,6 +1628,20 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>               }
>               ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
>               break;
> +     case HVCALL_SEND_IPI:
> +             if (unlikely(rep)) {
> +                     ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
> +                     break;
> +             }
> +             ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast);
> +             break;
> +     case HVCALL_SEND_IPI_EX:
> +             if (unlikely(fast || rep)) {

Now I'm getting worried that the ex can be fast as well and we'll be
reading the banks from XMM registers. :)

Reply via email to