On 29/08/19 2:46 PM, Peter Zijlstra wrote:
> On Thu, Aug 29, 2019 at 12:40:56PM +0300, Adrian Hunter wrote:
>> Can you expand on "and ensure the poke_handler preserves the existing
>> control flow"?  Whatever the INT3-handler does will be traced normally so
>> long as it does not itself execute self-modified code.
> 
> My thinking was that the code shouldn't change semantics before emitting
> the RECORD_TEXT_POKE; but you're right in that that doesn't seem to
> matter much.
> 
> Either we run the old code or INT3 does 'something'.  Then we get
> RECORD_TEXT_POKE and finish the poke.  Which tells that the moment INT3
> stops the new text is in place.
> 
> I suppose that works too, and requires less changes.


What about this?


diff --git a/arch/x86/include/asm/text-patching.h 
b/arch/x86/include/asm/text-patching.h
index 70c09967a999..00aa9bef2b9d 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -30,6 +30,7 @@ struct text_poke_loc {
        void *addr;
        size_t len;
        const char opcode[POKE_MAX_OPCODE_SIZE];
+       char old_opcode[POKE_MAX_OPCODE_SIZE];
 };
 
 extern void text_poke_early(void *addr, const void *opcode, size_t len);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ccd32013c47a..c781bbbbbafd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -3,6 +3,7 @@
 
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/perf_event.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/stringify.h>
@@ -1045,8 +1046,10 @@ void text_poke_bp_batch(struct text_poke_loc *tp, 
unsigned int nr_entries)
        /*
         * First step: add a int3 trap to the address that will be patched.
         */
-       for (i = 0; i < nr_entries; i++)
+       for (i = 0; i < nr_entries; i++) {
+               memcpy(tp[i].old_opcode, tp[i].addr, tp[i].len);
                text_poke(tp[i].addr, &int3, sizeof(int3));
+       }
 
        on_each_cpu(do_sync_core, NULL, 1);
 
@@ -1071,6 +1074,11 @@ void text_poke_bp_batch(struct text_poke_loc *tp, 
unsigned int nr_entries)
                on_each_cpu(do_sync_core, NULL, 1);
        }
 
+       for (i = 0; i < nr_entries; i++) {
+               perf_event_text_poke((unsigned long)tp[i].addr,
+                                    tp[i].old_opcode, tp[i].opcode, tp[i].len);
+       }
+
        /*
         * Third step: replace the first byte (int3) by the first byte of
         * replacing opcode.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61448c19a132..f4c6095d2110 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1183,6 +1183,8 @@ extern void perf_event_exec(void);
 extern void perf_event_comm(struct task_struct *tsk, bool exec);
 extern void perf_event_namespaces(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
+extern void perf_event_text_poke(unsigned long addr, const void *old_bytes,
+                                const void *new_bytes, size_t len);
 
 /* Callchains */
 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
@@ -1406,6 +1408,10 @@ static inline void perf_event_exec(void)                 
        { }
 static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
 static inline void perf_event_namespaces(struct task_struct *tsk)      { }
 static inline void perf_event_fork(struct task_struct *tsk)            { }
+static inline void perf_event_text_poke(unsigned long addr,
+                                       const void *old_bytes,
+                                       const void *new_bytes,
+                                       size_t len)                     { }
 static inline void perf_event_init(void)                               { }
 static inline int  perf_swevent_get_recursion_context(void)            { 
return -1; }
 static inline void perf_swevent_put_recursion_context(int rctx)                
{ }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index bb7b271397a6..6396d4c0d2f9 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -375,7 +375,8 @@ struct perf_event_attr {
                                ksymbol        :  1, /* include ksymbol events 
*/
                                bpf_event      :  1, /* include bpf events */
                                aux_output     :  1, /* generate AUX records 
instead of events */
-                               __reserved_1   : 32;
+                               text_poke      :  1, /* include text poke 
events */
+                               __reserved_1   : 31;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -1000,6 +1001,22 @@ enum perf_event_type {
         */
        PERF_RECORD_BPF_EVENT                   = 18,
 
+       /*
+        * Records changes to kernel text i.e. self-modified code.
+        * 'len' is the number of old bytes, which is the same as the number
+        * of new bytes. 'bytes' contains the old bytes followed immediately
+        * by the new bytes.
+        *
+        * struct {
+        *      struct perf_event_header        header;
+        *      u64                             addr;
+        *      u16                             len;
+        *      u8                              bytes[];
+        *      struct sample_id                sample_id;
+        * };
+        */
+       PERF_RECORD_TEXT_POKE                   = 19,
+
        PERF_RECORD_MAX,                        /* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 811bb333c986..43c0d3d232dc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -386,6 +386,7 @@ static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
 static atomic_t nr_ksymbol_events __read_mostly;
 static atomic_t nr_bpf_events __read_mostly;
+static atomic_t nr_text_poke_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -4339,7 +4340,7 @@ static bool is_sb_event(struct perf_event *event)
        if (attr->mmap || attr->mmap_data || attr->mmap2 ||
            attr->comm || attr->comm_exec ||
            attr->task || attr->ksymbol ||
-           attr->context_switch ||
+           attr->context_switch || attr->text_poke ||
            attr->bpf_event)
                return true;
        return false;
@@ -4413,6 +4414,8 @@ static void unaccount_event(struct perf_event *event)
                atomic_dec(&nr_ksymbol_events);
        if (event->attr.bpf_event)
                atomic_dec(&nr_bpf_events);
+       if (event->attr.text_poke)
+               atomic_dec(&nr_text_poke_events);
 
        if (dec) {
                if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -8045,6 +8048,85 @@ void perf_event_bpf_event(struct bpf_prog *prog,
        perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
 }
 
+struct perf_text_poke_event {
+       const void              *old_bytes;
+       const void              *new_bytes;
+       size_t                  pad;
+       u16                     len;
+
+       struct {
+               struct perf_event_header        header;
+
+               u64                             addr;
+       } event_id;
+};
+
+static int perf_event_text_poke_match(struct perf_event *event)
+{
+       return event->attr.text_poke;
+}
+
+static void perf_event_text_poke_output(struct perf_event *event, void *data)
+{
+       struct perf_text_poke_event *text_poke_event = data;
+       struct perf_output_handle handle;
+       struct perf_sample_data sample;
+       u64 padding = 0;
+       int ret;
+
+       if (!perf_event_text_poke_match(event))
+               return;
+
+       perf_event_header__init_id(&text_poke_event->event_id.header, &sample, 
event);
+
+       ret = perf_output_begin(&handle, event, 
text_poke_event->event_id.header.size);
+       if (ret)
+               return;
+
+       perf_output_put(&handle, text_poke_event->event_id);
+       perf_output_put(&handle, text_poke_event->len);
+
+       __output_copy(&handle, text_poke_event->old_bytes, 
text_poke_event->len);
+       __output_copy(&handle, text_poke_event->new_bytes, 
text_poke_event->len);
+
+       if (text_poke_event->pad)
+               __output_copy(&handle, &padding, text_poke_event->pad);
+
+       perf_event__output_id_sample(event, &handle, &sample);
+
+       perf_output_end(&handle);
+}
+
+void perf_event_text_poke(unsigned long addr, const void *old_bytes,
+                         const void *new_bytes, size_t len)
+{
+       struct perf_text_poke_event text_poke_event;
+       size_t tot, pad;
+
+       if (!atomic_read(&nr_text_poke_events))
+               return;
+
+       tot  = sizeof(text_poke_event.len) + (len << 1);
+       pad  = ALIGN(tot, sizeof(u64)) - tot;
+
+       text_poke_event = (struct perf_text_poke_event){
+               .old_bytes    = old_bytes,
+               .new_bytes    = new_bytes,
+               .pad          = pad,
+               .len          = len,
+               .event_id  = {
+                       .header = {
+                               .type = PERF_RECORD_TEXT_POKE,
+                               .misc = PERF_RECORD_MISC_KERNEL,
+                               .size = sizeof(text_poke_event.event_id) + tot 
+ pad,
+                       },
+                       .addr = addr,
+               },
+       };
+
+       perf_iterate_sb(perf_event_text_poke_output, &text_poke_event, NULL);
+}
+
 void perf_event_itrace_started(struct perf_event *event)
 {
        event->attach_state |= PERF_ATTACH_ITRACE;
@@ -10331,6 +10413,8 @@ static void account_event(struct perf_event *event)
                atomic_inc(&nr_ksymbol_events);
        if (event->attr.bpf_event)
                atomic_inc(&nr_bpf_events);
+       if (event->attr.text_poke)
+               atomic_inc(&nr_text_poke_events);
 
        if (inc) {
                /*
-- 
2.17.1

Reply via email to