When a guest counter is configured as a PEBS counter through
IA32_PEBS_ENABLE, a guest PEBS event will be reprogrammed by
configuring a non-zero precision level in the perf_event_attr.

The guest PEBS overflow PMI bit would be set in the guest
GLOBAL_STATUS MSR when PEBS facility generates a PEBS
overflow PMI based on guest IA32_DS_AREA MSR.

The attr.precise_ip would be adjusted to a special precision
level when the new PEBS-PDIR feature is supported later which
would affect the host counters scheduling.

The guest PEBS event would not be reused for non-PEBS
guest event even with the same guest counter index.

Originally-by: Andi Kleen <[email protected]>
Co-developed-by: Kan Liang <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Like Xu <[email protected]>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/pmu.c              | 29 +++++++++++++++++++++++++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 94c8bfee4a82..09dacda33fb8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -449,6 +449,8 @@ struct kvm_pmu {
        DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
        DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
 
+       u64 pebs_enable;
+
        /*
         * The gate to release perf_events not marked in
         * pmc_in_use only once in a vcpu time slice.
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 67741d2a0308..2e81c50323e2 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -76,7 +76,12 @@ static void kvm_perf_overflow_intr(struct perf_event 
*perf_event,
        struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 
        if (!test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) {
-               __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
+               if (perf_event->attr.precise_ip) {
+                       /* Indicate PEBS overflow PMI to guest. */
+                       __set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
+                               (unsigned long *)&pmu->global_status);
+               } else
+                       __set_bit(pmc->idx, (unsigned long 
*)&pmu->global_status);
                kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
 
                /*
@@ -99,6 +104,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 
type,
                                  bool exclude_kernel, bool intr,
                                  bool in_tx, bool in_tx_cp)
 {
+       struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu);
        struct perf_event *event;
        struct perf_event_attr attr = {
                .type = type,
@@ -110,6 +116,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 
type,
                .exclude_kernel = exclude_kernel,
                .config = config,
        };
+       bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
 
        attr.sample_period = get_sample_period(pmc, pmc->counter);
 
@@ -124,9 +131,23 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 
type,
                attr.sample_period = 0;
                attr.config |= HSW_IN_TX_CHECKPOINTED;
        }
+       if (pebs) {
+               /*
+                * The non-zero precision level of guest event makes the 
ordinary
+                * guest event becomes a guest PEBS event and triggers the host
+                * PEBS PMI handler to determine whether the PEBS overflow PMI
+                * comes from the host counters or the guest.
+                *
+                * For most PEBS hardware events, the difference in the software
+                * precision levels of guest and host PEBS events will not 
affect
+                * the accuracy of the PEBS profiling result, because the 
"event IP"
+                * in the PEBS record is calibrated on the guest side.
+                */
+               attr.precise_ip = 1;
+       }
 
        event = perf_event_create_kernel_counter(&attr, -1, current,
-                                                intr ? kvm_perf_overflow_intr :
+                                                (intr || pebs) ? 
kvm_perf_overflow_intr :
                                                 kvm_perf_overflow, pmc);
        if (IS_ERR(event)) {
                pr_debug_ratelimited("kvm_pmu: event creation failed %ld for 
pmc->idx = %d\n",
@@ -161,6 +182,10 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
                              get_sample_period(pmc, pmc->counter)))
                return false;
 
+       if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) 
&&
+           pmc->perf_event->attr.precise_ip)
+               return false;
+
        /* reuse perf_event to serve as pmc_reprogram_counter() does*/
        perf_event_enable(pmc->perf_event);
 
-- 
2.29.2

Reply via email to