On 27/09/2018 13:07, Roman Kagan wrote:
> On Wed, Sep 26, 2018 at 07:02:59PM +0200, Vitaly Kuznetsov wrote:
>> Using hypercall for sending IPIs is faster because this allows to specify
>> any number of vCPUs (even > 64 with sparse CPU set), the whole procedure
>> will take only one VMEXIT.
>>
>> Current Hyper-V TLFS (v5.0b) claims that HvCallSendSyntheticClusterIpi
>> hypercall can't be 'fast' (passing parameters through registers) but
>> apparently this is not true, Windows always uses it as 'fast' so we need
>> to support that.
>>
>> Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com>
>> ---
>>  Documentation/virtual/kvm/api.txt |   7 ++
>>  arch/x86/kvm/hyperv.c             | 115 ++++++++++++++++++++++++++++++
>>  arch/x86/kvm/trace.h              |  42 +++++++++++
>>  arch/x86/kvm/x86.c                |   1 +
>>  include/uapi/linux/kvm.h          |   1 +
>>  5 files changed, 166 insertions(+)
>>
>> diff --git a/Documentation/virtual/kvm/api.txt 
>> b/Documentation/virtual/kvm/api.txt
>> index 647f94128a85..1659b75d577d 100644
>> --- a/Documentation/virtual/kvm/api.txt
>> +++ b/Documentation/virtual/kvm/api.txt
>> @@ -4772,3 +4772,10 @@ CPU when the exception is taken. If this virtual 
>> SError is taken to EL1 using
>>  AArch64, this value will be reported in the ISS field of ESR_ELx.
>>  
>>  See KVM_CAP_VCPU_EVENTS for more details.
>> +8.20 KVM_CAP_HYPERV_SEND_IPI
>> +
>> +Architectures: x86
>> +
>> +This capability indicates that KVM supports paravirtualized Hyper-V IPI send
>> +hypercalls:
>> +HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index cc0535a078f7..4b4a6d015ade 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -1405,6 +1405,107 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu 
>> *current_vcpu, u64 ingpa,
>>              ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
>>  }
>>  
>> +static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 
>> outgpa,
>> +                       bool ex, bool fast)
>> +{
>> +    struct kvm *kvm = current_vcpu->kvm;
>> +    struct kvm_hv *hv = &kvm->arch.hyperv;
>> +    struct hv_send_ipi_ex send_ipi_ex;
>> +    struct hv_send_ipi send_ipi;
>> +    struct kvm_vcpu *vcpu;
>> +    unsigned long valid_bank_mask;
>> +    u64 sparse_banks[64];
>> +    int sparse_banks_len, bank, i, sbank;
>> +    struct kvm_lapic_irq irq = {.delivery_mode = APIC_DM_FIXED};
>> +    bool all_cpus;
>> +
>> +    if (!ex) {
>> +            if (!fast) {
>> +                    if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
>> +                                                sizeof(send_ipi))))
>> +                            return HV_STATUS_INVALID_HYPERCALL_INPUT;
>> +                    sparse_banks[0] = send_ipi.cpu_mask;
>> +                    irq.vector = send_ipi.vector;
>> +            } else {
>> +                    /* 'reserved' part of hv_send_ipi should be 0 */
>> +                    if (unlikely(ingpa >> 32 != 0))
>> +                            return HV_STATUS_INVALID_HYPERCALL_INPUT;
>> +                    sparse_banks[0] = outgpa;
>> +                    irq.vector = (u32)ingpa;
>> +            }
>> +            all_cpus = false;
>> +            valid_bank_mask = BIT_ULL(0);
>> +
>> +            trace_kvm_hv_send_ipi(irq.vector, sparse_banks[0]);
>> +    } else {
>> +            if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
>> +                                        sizeof(send_ipi_ex))))
>> +                    return HV_STATUS_INVALID_HYPERCALL_INPUT;
>> +
>> +            trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
>> +                                     send_ipi_ex.vp_set.format,
>> +                                     send_ipi_ex.vp_set.valid_bank_mask);
>> +
>> +            irq.vector = send_ipi_ex.vector;
>> +            valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
>> +            sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
>> +                    sizeof(sparse_banks[0]);
>> +
>> +            all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
>> +
>> +            if (!sparse_banks_len)
>> +                    goto ret_success;
>> +
>> +            if (!all_cpus &&
>> +                kvm_read_guest(kvm,
>> +                               ingpa + offsetof(struct hv_send_ipi_ex,
>> +                                                vp_set.bank_contents),
>> +                               sparse_banks,
>> +                               sparse_banks_len))
>> +                    return HV_STATUS_INVALID_HYPERCALL_INPUT;
>> +    }
>> +
>> +    if ((irq.vector < HV_IPI_LOW_VECTOR) ||
>> +        (irq.vector > HV_IPI_HIGH_VECTOR))
>> +            return HV_STATUS_INVALID_HYPERCALL_INPUT;
>> +
>> +    if (all_cpus || atomic_read(&hv->num_mismatched_vp_indexes)) {
>> +            kvm_for_each_vcpu(i, vcpu, kvm) {
>> +                    if (all_cpus || hv_vcpu_in_sparse_set(
>> +                                &vcpu->arch.hyperv, sparse_banks,
>> +                                valid_bank_mask)) {
>> +                            /* We fail only when APIC is disabled */
>> +                            kvm_apic_set_irq(vcpu, &irq, NULL);
>> +                    }
>> +            }
>> +            goto ret_success;
>> +    }
>> +
>> +    /*
>> +     * num_mismatched_vp_indexes is zero so every vcpu has
>> +     * vp_index == vcpu_idx.
>> +     */
>> +    sbank = 0;
>> +    for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, 64) {
>> +            for_each_set_bit(i, (unsigned long *)&sparse_banks[sbank], 64) {
>> +                    u32 vp_index = bank * 64 + i;
>> +                    struct kvm_vcpu *vcpu =
>> +                            get_vcpu_by_vpidx(kvm, vp_index);
>> +
>> +                    /* Unknown vCPU specified */
>> +                    if (!vcpu)
>> +                            continue;
>> +
>> +                    /* We fail only when APIC is disabled */
>> +                    kvm_apic_set_irq(vcpu, &irq, NULL);
>> +            }
>> +            sbank++;
>> +    }
>> +
>> +ret_success:
>> +    return HV_STATUS_SUCCESS;
>> +}
>> +
> 
> I must say that now it looks even more tempting to follow the same
> pattern as your kvm_hv_flush_tlb: define a function that would call
> kvm_apic_set_irq() on all vcpus in a mask (optimizing the all-set case
> with a NULL mask), and make kvm_hv_send_ipi perform the same hv_vp_set
> -> vcpu_mask transformation followed by calling into that function.


It would perhaps be cleaner, but really kvm_apic_set_irq is as efficient
as it can be, since it takes the destination vcpu directly.

The code duplication for walking the sparse set is a bit ugly, perhaps
that could be changed to use an iterator macro.

Paolo

Reply via email to