Signed-off-by: Kan Liang <[email protected]>
---
This patch is kernel patch.
The user space patch can be found here.
https://urldefense.proofpoint.com/v2/url?u=https-3A__www.spinics.net_lists_kernel_msg2587093.html&d=DwIBaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=3f2W2m24mqGnx1C8qDsVjM_Sd89MwbaDB37IJVL-h7w&m=bL7qsJKf8aRpvBVr07ODZxoQhPgxffpGFgN9A4iyACw&s=R1OMYJt1szqNhPZu8QQDDRCnQgHZiQzcCPzofF6FIF8&e=
Changes since V7
- Fix virt_addr_valid compile warning for MIPS architecture (LKP)
arch/powerpc/perf/core-book3s.c | 3 ++-
arch/x86/events/intel/ds.c | 2 +-
arch/x86/events/perf_event.h | 2 +-
include/linux/perf_event.h | 2 ++
include/uapi/linux/perf_event.h | 4 +++-
kernel/events/core.c | 47 +++++++++++++++++++++++++++++++++++++++++
6 files changed, 56 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6c2d416..2e3eb74 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2039,7 +2039,8 @@ static void record_and_restart(struct perf_event *event,
unsigned long val,
perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
- if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+ if (event->attr.sample_type &
+ (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
perf_get_data_addr(regs, &data.addr);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index a322fed..0516f78 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1175,7 +1175,7 @@ static void setup_pebs_sample_data(struct perf_event
*event,
else
regs->flags &= ~PERF_EFLAGS_EXACT;
- if ((sample_type & PERF_SAMPLE_ADDR) &&
+ if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
x86_pmu.intel_cap.pebs_format >= 1)
data->addr = pebs->dla;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 476aec3..65bb91e 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -91,7 +91,7 @@ struct amd_nb {
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
- PERF_SAMPLE_TRANSACTION)
+ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
/*
* A debug store configuration.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b14095b..74fb87e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -944,6 +944,8 @@ struct perf_sample_data {
struct perf_regs regs_intr;
u64 stack_user_size;
+
+ u64 phys_addr;
} ____cacheline_aligned;
/* default value for data source */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 642db5f..cbea02f 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_IDENTIFIER = 1U << 16,
PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18,
+ PERF_SAMPLE_PHYS_ADDR = 1U << 19,
- PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
};
/*
@@ -814,6 +815,7 @@ enum perf_event_type {
* { u64 transaction; } &&
PERF_SAMPLE_TRANSACTION
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } &&
PERF_SAMPLE_REGS_INTR
+ * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
* };
*/
PERF_RECORD_SAMPLE = 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d704e23..e8d5c5d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1570,6 +1570,9 @@ static void __perf_event_header_size(struct perf_event
*event, u64 sample_type)
if (sample_type & PERF_SAMPLE_TRANSACTION)
size += sizeof(data->txn);
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+ size += sizeof(data->phys_addr);
+
event->header_size = size;
}
@@ -6012,6 +6015,9 @@ void perf_output_sample(struct perf_output_handle *handle,
}
}
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+ perf_output_put(handle, data->phys_addr);
+
if (!event->attr.watermark) {
int wakeup_events = event->attr.wakeup_events;
@@ -6027,6 +6033,38 @@ void perf_output_sample(struct perf_output_handle *handle,
}
}
+static u64 perf_virt_to_phys(u64 virt)
+{
+ u64 phys_addr = 0;
+ struct page *p = NULL;
+
+ if (!virt)
+ return 0;
+
+ if (virt >= TASK_SIZE) {
+ /* If it's vmalloc()d memory, leave phys_addr as 0 */
+ if (virt_addr_valid((void *)(uintptr_t)virt) &&
+ !(virt >= VMALLOC_START && virt < VMALLOC_END))
+ phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt);
+ } else {
+ /*
+ * Walking the pages tables for user address.
+ * Interrupts are disabled, so it prevents any tear down
+ * of the page tables.
+ * Try IRQ-safe __get_user_pages_fast first.
+ * If failed, leave phys_addr as 0.
+ */
+ if ((current->mm != NULL) &&
+ (__get_user_pages_fast(virt, 1, 0, &p) == 1))
+ phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
+
+ if (p)
+ put_page(p);
+ }
+
+ return phys_addr;
+}
+
void perf_prepare_sample(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event,
@@ -6145,6 +6183,9 @@ void perf_prepare_sample(struct perf_event_header *header,
header->size += size;
}
+
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+ data->phys_addr = perf_virt_to_phys(data->addr);
}
static void __always_inline
@@ -9892,6 +9933,12 @@ SYSCALL_DEFINE5(perf_event_open,
return -EINVAL;
}
+ /* Only privileged users can get kernel addresses */
+ if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
+ perf_paranoid_kernel() &&
+ !capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
if (!attr.sample_max_stack)
attr.sample_max_stack = sysctl_perf_event_max_stack;