[for-next][PATCH 14/17] tracing/uprobes: Fetch args before reserving a ring buffer
From: Namhyung Kim Fetching from user space should be done in a non-atomic context. So use a per-cpu buffer and copy its content to the ring buffer atomically. Note that we can migrate during accessing user memory thus use a per-cpu mutex to protect concurrent accesses. This is needed since we'll be able to fetch args from an user memory which can be swapped out. Before that uprobes could fetch args from registers only which saved in a kernel space. While at it, use __get_data_size() and store_trace_args() to reduce code duplication. And add struct uprobe_cpu_buffer and its helpers as suggested by Oleg. Reviewed-by: Masami Hiramatsu Acked-by: Oleg Nesterov Cc: Srikar Dronamraju Cc: zhangwei(Jovi) Cc: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim --- kernel/trace/trace_uprobe.c | 146 +++- 1 file changed, 132 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8bfd29a..794e8bc 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -652,21 +652,117 @@ static const struct file_operations uprobe_profile_ops = { .release= seq_release, }; +struct uprobe_cpu_buffer { + struct mutex mutex; + void *buf; +}; +static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; +static int uprobe_buffer_refcnt; + +static int uprobe_buffer_init(void) +{ + int cpu, err_cpu; + + uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); + if (uprobe_cpu_buffer == NULL) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct page *p = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL, 0); + if (p == NULL) { + err_cpu = cpu; + goto err; + } + per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p); + mutex_init(_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex); + } + + return 0; + +err: + for_each_possible_cpu(cpu) { + if (cpu == err_cpu) + break; + free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf); + } + + free_percpu(uprobe_cpu_buffer); + return -ENOMEM; +} + +static int uprobe_buffer_enable(void) +{ + int ret = 0; + + BUG_ON(!mutex_is_locked(_mutex)); + + if (uprobe_buffer_refcnt++ == 0) { + ret = uprobe_buffer_init(); + if (ret < 0) + uprobe_buffer_refcnt--; + } + + return ret; +} + +static void uprobe_buffer_disable(void) +{ + BUG_ON(!mutex_is_locked(_mutex)); + + if (--uprobe_buffer_refcnt == 0) { + free_percpu(uprobe_cpu_buffer); + uprobe_cpu_buffer = NULL; + } +} + +static struct uprobe_cpu_buffer *uprobe_buffer_get(void) +{ + struct uprobe_cpu_buffer *ucb; + int cpu; + + cpu = raw_smp_processor_id(); + ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); + + /* +* Use per-cpu buffers for fastest access, but we might migrate +* so the mutex makes sure we have sole access to it. +*/ + mutex_lock(>mutex); + + return ucb; +} + +static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) +{ + mutex_unlock(>mutex); +} + static void uprobe_trace_print(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; + struct uprobe_cpu_buffer *ucb; void *data; - int size, i; + int size, dsize, esize; struct ftrace_event_call *call = >tp.call; - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + dsize = __get_data_size(>tp, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu->tp.size + dsize > PAGE_SIZE)) + return; + + ucb = uprobe_buffer_get(); + store_trace_args(esize, >tp, regs, ucb->buf, dsize); + + size = esize + tu->tp.size + dsize; event = trace_current_buffer_lock_reserve(, call->event.type, - size + tu->tp.size, 0, 0); + size, 0, 0); if (!event) - return; + goto out; entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { @@ -678,13 +774,13 @@ static void uprobe_trace_print(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i < tu->tp.nr_args; i++) { - call_fetch(>tp.args[i].fetch, regs, - data + tu->tp.args[i].offset); - } + memcpy(data, ucb->buf, tu->tp.size + dsize); if
[for-next][PATCH 14/17] tracing/uprobes: Fetch args before reserving a ring buffer
From: Namhyung Kim namhyung@lge.com Fetching from user space should be done in a non-atomic context. So use a per-cpu buffer and copy its content to the ring buffer atomically. Note that we can migrate during accessing user memory thus use a per-cpu mutex to protect concurrent accesses. This is needed since we'll be able to fetch args from an user memory which can be swapped out. Before that uprobes could fetch args from registers only which saved in a kernel space. While at it, use __get_data_size() and store_trace_args() to reduce code duplication. And add struct uprobe_cpu_buffer and its helpers as suggested by Oleg. Reviewed-by: Masami Hiramatsu masami.hiramatsu...@hitachi.com Acked-by: Oleg Nesterov o...@redhat.com Cc: Srikar Dronamraju sri...@linux.vnet.ibm.com Cc: zhangwei(Jovi) jovi.zhang...@huawei.com Cc: Arnaldo Carvalho de Melo a...@ghostprotocols.net Signed-off-by: Namhyung Kim namhy...@kernel.org --- kernel/trace/trace_uprobe.c | 146 +++- 1 file changed, 132 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8bfd29a..794e8bc 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -652,21 +652,117 @@ static const struct file_operations uprobe_profile_ops = { .release= seq_release, }; +struct uprobe_cpu_buffer { + struct mutex mutex; + void *buf; +}; +static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; +static int uprobe_buffer_refcnt; + +static int uprobe_buffer_init(void) +{ + int cpu, err_cpu; + + uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); + if (uprobe_cpu_buffer == NULL) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct page *p = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL, 0); + if (p == NULL) { + err_cpu = cpu; + goto err; + } + per_cpu_ptr(uprobe_cpu_buffer, cpu)-buf = page_address(p); + mutex_init(per_cpu_ptr(uprobe_cpu_buffer, cpu)-mutex); + } + + return 0; + +err: + for_each_possible_cpu(cpu) { + if (cpu == err_cpu) + break; + free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)-buf); + } + + free_percpu(uprobe_cpu_buffer); + return -ENOMEM; +} + +static int uprobe_buffer_enable(void) +{ + int ret = 0; + + BUG_ON(!mutex_is_locked(event_mutex)); + + if (uprobe_buffer_refcnt++ == 0) { + ret = uprobe_buffer_init(); + if (ret 0) + uprobe_buffer_refcnt--; + } + + return ret; +} + +static void uprobe_buffer_disable(void) +{ + BUG_ON(!mutex_is_locked(event_mutex)); + + if (--uprobe_buffer_refcnt == 0) { + free_percpu(uprobe_cpu_buffer); + uprobe_cpu_buffer = NULL; + } +} + +static struct uprobe_cpu_buffer *uprobe_buffer_get(void) +{ + struct uprobe_cpu_buffer *ucb; + int cpu; + + cpu = raw_smp_processor_id(); + ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); + + /* +* Use per-cpu buffers for fastest access, but we might migrate +* so the mutex makes sure we have sole access to it. +*/ + mutex_lock(ucb-mutex); + + return ucb; +} + +static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) +{ + mutex_unlock(ucb-mutex); +} + static void uprobe_trace_print(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; + struct uprobe_cpu_buffer *ucb; void *data; - int size, i; + int size, dsize, esize; struct ftrace_event_call *call = tu-tp.call; - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + dsize = __get_data_size(tu-tp, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu-tp.size + dsize PAGE_SIZE)) + return; + + ucb = uprobe_buffer_get(); + store_trace_args(esize, tu-tp, regs, ucb-buf, dsize); + + size = esize + tu-tp.size + dsize; event = trace_current_buffer_lock_reserve(buffer, call-event.type, - size + tu-tp.size, 0, 0); + size, 0, 0); if (!event) - return; + goto out; entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { @@ -678,13 +774,13 @@ static void uprobe_trace_print(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i tu-tp.nr_args; i++) { -
[PATCH 14/17] tracing/uprobes: Fetch args before reserving a ring buffer
From: Namhyung Kim Fetching from user space should be done in a non-atomic context. So use a per-cpu buffer and copy its content to the ring buffer atomically. Note that we can migrate during accessing user memory thus use a per-cpu mutex to protect concurrent accesses. This is needed since we'll be able to fetch args from an user memory which can be swapped out. Before that uprobes could fetch args from registers only which saved in a kernel space. While at it, use __get_data_size() and store_trace_args() to reduce code duplication. And add struct uprobe_cpu_buffer and its helpers as suggested by Oleg. Reviewed-by: Masami Hiramatsu Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: zhangwei(Jovi) Cc: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim --- kernel/trace/trace_uprobe.c | 146 +++- 1 file changed, 132 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 39dfc9a01a4a..6ed319be5a84 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -652,21 +652,117 @@ static const struct file_operations uprobe_profile_ops = { .release= seq_release, }; +struct uprobe_cpu_buffer { + struct mutex mutex; + void *buf; +}; +static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; +static int uprobe_buffer_refcnt; + +static int uprobe_buffer_init(void) +{ + int cpu, err_cpu; + + uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); + if (uprobe_cpu_buffer == NULL) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct page *p = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL, 0); + if (p == NULL) { + err_cpu = cpu; + goto err; + } + per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p); + mutex_init(_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex); + } + + return 0; + +err: + for_each_possible_cpu(cpu) { + if (cpu == err_cpu) + break; + free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf); + } + + free_percpu(uprobe_cpu_buffer); + return -ENOMEM; +} + +static int uprobe_buffer_enable(void) +{ + int ret = 0; + + BUG_ON(!mutex_is_locked(_mutex)); + + if (uprobe_buffer_refcnt++ == 0) { + ret = uprobe_buffer_init(); + if (ret < 0) + uprobe_buffer_refcnt--; + } + + return ret; +} + +static void uprobe_buffer_disable(void) +{ + BUG_ON(!mutex_is_locked(_mutex)); + + if (--uprobe_buffer_refcnt == 0) { + free_percpu(uprobe_cpu_buffer); + uprobe_cpu_buffer = NULL; + } +} + +static struct uprobe_cpu_buffer *uprobe_buffer_get(void) +{ + struct uprobe_cpu_buffer *ucb; + int cpu; + + cpu = raw_smp_processor_id(); + ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); + + /* +* Use per-cpu buffers for fastest access, but we might migrate +* so the mutex makes sure we have sole access to it. +*/ + mutex_lock(>mutex); + + return ucb; +} + +static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) +{ + mutex_unlock(>mutex); +} + static void uprobe_trace_print(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; + struct uprobe_cpu_buffer *ucb; void *data; - int size, i; + int size, dsize, esize; struct ftrace_event_call *call = >tp.call; - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + dsize = __get_data_size(>tp, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu->tp.size + dsize > PAGE_SIZE)) + return; + + ucb = uprobe_buffer_get(); + store_trace_args(esize, >tp, regs, ucb->buf, dsize); + + size = esize + tu->tp.size + dsize; event = trace_current_buffer_lock_reserve(, call->event.type, - size + tu->tp.size, 0, 0); + size, 0, 0); if (!event) - return; + goto out; entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { @@ -678,13 +774,13 @@ static void uprobe_trace_print(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i < tu->tp.nr_args; i++) { - call_fetch(>tp.args[i].fetch, regs, - data + tu->tp.args[i].offset); - } + memcpy(data, ucb->buf, tu->tp.size + dsize); if
[PATCH 14/17] tracing/uprobes: Fetch args before reserving a ring buffer
From: Namhyung Kim namhyung@lge.com Fetching from user space should be done in a non-atomic context. So use a per-cpu buffer and copy its content to the ring buffer atomically. Note that we can migrate during accessing user memory thus use a per-cpu mutex to protect concurrent accesses. This is needed since we'll be able to fetch args from an user memory which can be swapped out. Before that uprobes could fetch args from registers only which saved in a kernel space. While at it, use __get_data_size() and store_trace_args() to reduce code duplication. And add struct uprobe_cpu_buffer and its helpers as suggested by Oleg. Reviewed-by: Masami Hiramatsu masami.hiramatsu...@hitachi.com Cc: Srikar Dronamraju sri...@linux.vnet.ibm.com Cc: Oleg Nesterov o...@redhat.com Cc: zhangwei(Jovi) jovi.zhang...@huawei.com Cc: Arnaldo Carvalho de Melo a...@ghostprotocols.net Signed-off-by: Namhyung Kim namhy...@kernel.org --- kernel/trace/trace_uprobe.c | 146 +++- 1 file changed, 132 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 39dfc9a01a4a..6ed319be5a84 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -652,21 +652,117 @@ static const struct file_operations uprobe_profile_ops = { .release= seq_release, }; +struct uprobe_cpu_buffer { + struct mutex mutex; + void *buf; +}; +static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; +static int uprobe_buffer_refcnt; + +static int uprobe_buffer_init(void) +{ + int cpu, err_cpu; + + uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); + if (uprobe_cpu_buffer == NULL) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct page *p = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL, 0); + if (p == NULL) { + err_cpu = cpu; + goto err; + } + per_cpu_ptr(uprobe_cpu_buffer, cpu)-buf = page_address(p); + mutex_init(per_cpu_ptr(uprobe_cpu_buffer, cpu)-mutex); + } + + return 0; + +err: + for_each_possible_cpu(cpu) { + if (cpu == err_cpu) + break; + free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)-buf); + } + + free_percpu(uprobe_cpu_buffer); + return -ENOMEM; +} + +static int uprobe_buffer_enable(void) +{ + int ret = 0; + + BUG_ON(!mutex_is_locked(event_mutex)); + + if (uprobe_buffer_refcnt++ == 0) { + ret = uprobe_buffer_init(); + if (ret 0) + uprobe_buffer_refcnt--; + } + + return ret; +} + +static void uprobe_buffer_disable(void) +{ + BUG_ON(!mutex_is_locked(event_mutex)); + + if (--uprobe_buffer_refcnt == 0) { + free_percpu(uprobe_cpu_buffer); + uprobe_cpu_buffer = NULL; + } +} + +static struct uprobe_cpu_buffer *uprobe_buffer_get(void) +{ + struct uprobe_cpu_buffer *ucb; + int cpu; + + cpu = raw_smp_processor_id(); + ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); + + /* +* Use per-cpu buffers for fastest access, but we might migrate +* so the mutex makes sure we have sole access to it. +*/ + mutex_lock(ucb-mutex); + + return ucb; +} + +static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) +{ + mutex_unlock(ucb-mutex); +} + static void uprobe_trace_print(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; + struct uprobe_cpu_buffer *ucb; void *data; - int size, i; + int size, dsize, esize; struct ftrace_event_call *call = tu-tp.call; - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + dsize = __get_data_size(tu-tp, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu-tp.size + dsize PAGE_SIZE)) + return; + + ucb = uprobe_buffer_get(); + store_trace_args(esize, tu-tp, regs, ucb-buf, dsize); + + size = esize + tu-tp.size + dsize; event = trace_current_buffer_lock_reserve(buffer, call-event.type, - size + tu-tp.size, 0, 0); + size, 0, 0); if (!event) - return; + goto out; entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { @@ -678,13 +774,13 @@ static void uprobe_trace_print(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i tu-tp.nr_args; i++) { -
[PATCH 14/17] tracing/uprobes: Fetch args before reserving a ring buffer
From: Namhyung Kim Fetching from user space should be done in a non-atomic context. So use a per-cpu buffer and copy its content to the ring buffer atomically. Note that we can migrate during accessing user memory thus use a per-cpu mutex to protect concurrent accesses. This is needed since we'll be able to fetch args from an user memory which can be swapped out. Before that uprobes could fetch args from registers only which saved in a kernel space. While at it, use __get_data_size() and store_trace_args() to reduce code duplication. And add struct uprobe_cpu_buffer and its helpers as suggested by Oleg. Reviewed-by: Masami Hiramatsu Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: zhangwei(Jovi) Cc: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim --- kernel/trace/trace_uprobe.c | 146 +++- 1 file changed, 132 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d407a556aa55..f86a6a711de9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -650,21 +650,117 @@ static const struct file_operations uprobe_profile_ops = { .release= seq_release, }; +struct uprobe_cpu_buffer { + struct mutex mutex; + void *buf; +}; +static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; +static int uprobe_buffer_refcnt; + +static int uprobe_buffer_init(void) +{ + int cpu, err_cpu; + + uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); + if (uprobe_cpu_buffer == NULL) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct page *p = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL, 0); + if (p == NULL) { + err_cpu = cpu; + goto err; + } + per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p); + mutex_init(_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex); + } + + return 0; + +err: + for_each_possible_cpu(cpu) { + if (cpu == err_cpu) + break; + free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf); + } + + free_percpu(uprobe_cpu_buffer); + return -ENOMEM; +} + +static int uprobe_buffer_enable(void) +{ + int ret = 0; + + BUG_ON(!mutex_is_locked(_mutex)); + + if (uprobe_buffer_refcnt++ == 0) { + ret = uprobe_buffer_init(); + if (ret < 0) + uprobe_buffer_refcnt--; + } + + return ret; +} + +static void uprobe_buffer_disable(void) +{ + BUG_ON(!mutex_is_locked(_mutex)); + + if (--uprobe_buffer_refcnt == 0) { + free_percpu(uprobe_cpu_buffer); + uprobe_cpu_buffer = NULL; + } +} + +static struct uprobe_cpu_buffer *uprobe_buffer_get(void) +{ + struct uprobe_cpu_buffer *ucb; + int cpu; + + cpu = raw_smp_processor_id(); + ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); + + /* +* Use per-cpu buffers for fastest access, but we might migrate +* so the mutex makes sure we have sole access to it. +*/ + mutex_lock(>mutex); + + return ucb; +} + +static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) +{ + mutex_unlock(>mutex); +} + static void uprobe_trace_print(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; + struct uprobe_cpu_buffer *ucb; void *data; - int size, i; + int size, dsize, esize; struct ftrace_event_call *call = >tp.call; - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + dsize = __get_data_size(>tp, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu->tp.size + dsize > PAGE_SIZE)) + return; + + ucb = uprobe_buffer_get(); + store_trace_args(esize, >tp, regs, ucb->buf, dsize); + + size = esize + tu->tp.size + dsize; event = trace_current_buffer_lock_reserve(, call->event.type, - size + tu->tp.size, 0, 0); + size, 0, 0); if (!event) - return; + goto out; entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { @@ -676,13 +772,13 @@ static void uprobe_trace_print(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i < tu->tp.nr_args; i++) { - call_fetch(>tp.args[i].fetch, regs, - data + tu->tp.args[i].offset); - } + memcpy(data, ucb->buf, tu->tp.size + dsize); if
[PATCH 14/17] tracing/uprobes: Fetch args before reserving a ring buffer
From: Namhyung Kim namhyung@lge.com Fetching from user space should be done in a non-atomic context. So use a per-cpu buffer and copy its content to the ring buffer atomically. Note that we can migrate during accessing user memory thus use a per-cpu mutex to protect concurrent accesses. This is needed since we'll be able to fetch args from an user memory which can be swapped out. Before that uprobes could fetch args from registers only which saved in a kernel space. While at it, use __get_data_size() and store_trace_args() to reduce code duplication. And add struct uprobe_cpu_buffer and its helpers as suggested by Oleg. Reviewed-by: Masami Hiramatsu masami.hiramatsu...@hitachi.com Cc: Srikar Dronamraju sri...@linux.vnet.ibm.com Cc: Oleg Nesterov o...@redhat.com Cc: zhangwei(Jovi) jovi.zhang...@huawei.com Cc: Arnaldo Carvalho de Melo a...@ghostprotocols.net Signed-off-by: Namhyung Kim namhy...@kernel.org --- kernel/trace/trace_uprobe.c | 146 +++- 1 file changed, 132 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index d407a556aa55..f86a6a711de9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -650,21 +650,117 @@ static const struct file_operations uprobe_profile_ops = { .release= seq_release, }; +struct uprobe_cpu_buffer { + struct mutex mutex; + void *buf; +}; +static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; +static int uprobe_buffer_refcnt; + +static int uprobe_buffer_init(void) +{ + int cpu, err_cpu; + + uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); + if (uprobe_cpu_buffer == NULL) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct page *p = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL, 0); + if (p == NULL) { + err_cpu = cpu; + goto err; + } + per_cpu_ptr(uprobe_cpu_buffer, cpu)-buf = page_address(p); + mutex_init(per_cpu_ptr(uprobe_cpu_buffer, cpu)-mutex); + } + + return 0; + +err: + for_each_possible_cpu(cpu) { + if (cpu == err_cpu) + break; + free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)-buf); + } + + free_percpu(uprobe_cpu_buffer); + return -ENOMEM; +} + +static int uprobe_buffer_enable(void) +{ + int ret = 0; + + BUG_ON(!mutex_is_locked(event_mutex)); + + if (uprobe_buffer_refcnt++ == 0) { + ret = uprobe_buffer_init(); + if (ret 0) + uprobe_buffer_refcnt--; + } + + return ret; +} + +static void uprobe_buffer_disable(void) +{ + BUG_ON(!mutex_is_locked(event_mutex)); + + if (--uprobe_buffer_refcnt == 0) { + free_percpu(uprobe_cpu_buffer); + uprobe_cpu_buffer = NULL; + } +} + +static struct uprobe_cpu_buffer *uprobe_buffer_get(void) +{ + struct uprobe_cpu_buffer *ucb; + int cpu; + + cpu = raw_smp_processor_id(); + ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); + + /* +* Use per-cpu buffers for fastest access, but we might migrate +* so the mutex makes sure we have sole access to it. +*/ + mutex_lock(ucb-mutex); + + return ucb; +} + +static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) +{ + mutex_unlock(ucb-mutex); +} + static void uprobe_trace_print(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; + struct uprobe_cpu_buffer *ucb; void *data; - int size, i; + int size, dsize, esize; struct ftrace_event_call *call = tu-tp.call; - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + dsize = __get_data_size(tu-tp, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu-tp.size + dsize PAGE_SIZE)) + return; + + ucb = uprobe_buffer_get(); + store_trace_args(esize, tu-tp, regs, ucb-buf, dsize); + + size = esize + tu-tp.size + dsize; event = trace_current_buffer_lock_reserve(buffer, call-event.type, - size + tu-tp.size, 0, 0); + size, 0, 0); if (!event) - return; + goto out; entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { @@ -676,13 +772,13 @@ static void uprobe_trace_print(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i tu-tp.nr_args; i++) { -
[PATCH 14/17] tracing/uprobes: Fetch args before reserving a ring buffer
From: Namhyung Kim Fetching from user space should be done in a non-atomic context. So use a per-cpu buffer and copy its content to the ring buffer atomically. Note that we can migrate during accessing user memory thus use a per-cpu mutex to protect concurrent accesses. This is needed since we'll be able to fetch args from an user memory which can be swapped out. Before that uprobes could fetch args from registers only which saved in a kernel space. While at it, use __get_data_size() and store_trace_args() to reduce code duplication. And add struct uprobe_cpu_buffer and its helpers as suggested by Oleg. Reviewed-by: Masami Hiramatsu Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: zhangwei(Jovi) Cc: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim --- kernel/trace/trace_uprobe.c | 146 +++- 1 file changed, 132 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 5a6eb5686617..c037477d1c1a 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -650,21 +650,117 @@ static const struct file_operations uprobe_profile_ops = { .release= seq_release, }; +struct uprobe_cpu_buffer { + struct mutex mutex; + void *buf; +}; +static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; +static int uprobe_buffer_refcnt; + +static int uprobe_buffer_init(void) +{ + int cpu, err_cpu; + + uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); + if (uprobe_cpu_buffer == NULL) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct page *p = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL, 0); + if (p == NULL) { + err_cpu = cpu; + goto err; + } + per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p); + mutex_init(_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex); + } + + return 0; + +err: + for_each_possible_cpu(cpu) { + if (cpu == err_cpu) + break; + free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf); + } + + free_percpu(uprobe_cpu_buffer); + return -ENOMEM; +} + +static int uprobe_buffer_enable(void) +{ + int ret = 0; + + BUG_ON(!mutex_is_locked(_mutex)); + + if (uprobe_buffer_refcnt++ == 0) { + ret = uprobe_buffer_init(); + if (ret < 0) + uprobe_buffer_refcnt--; + } + + return ret; +} + +static void uprobe_buffer_disable(void) +{ + BUG_ON(!mutex_is_locked(_mutex)); + + if (--uprobe_buffer_refcnt == 0) { + free_percpu(uprobe_cpu_buffer); + uprobe_cpu_buffer = NULL; + } +} + +static struct uprobe_cpu_buffer *uprobe_buffer_get(void) +{ + struct uprobe_cpu_buffer *ucb; + int cpu; + + cpu = raw_smp_processor_id(); + ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); + + /* +* Use per-cpu buffers for fastest access, but we might migrate +* so the mutex makes sure we have sole access to it. +*/ + mutex_lock(>mutex); + + return ucb; +} + +static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) +{ + mutex_unlock(>mutex); +} + static void uprobe_trace_print(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; + struct uprobe_cpu_buffer *ucb; void *data; - int size, i; + int size, dsize, esize; struct ftrace_event_call *call = >p.call; - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + dsize = __get_data_size(>p, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu->p.size + dsize > PAGE_SIZE)) + return; + + ucb = uprobe_buffer_get(); + store_trace_args(esize, >p, regs, ucb->buf, dsize); + + size = esize + tu->p.size + dsize; event = trace_current_buffer_lock_reserve(, call->event.type, - size + tu->p.size, 0, 0); + size, 0, 0); if (!event) - return; + goto out; entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { @@ -676,13 +772,13 @@ static void uprobe_trace_print(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i < tu->p.nr_args; i++) { - call_fetch(>p.args[i].fetch, regs, - data + tu->p.args[i].offset); - } + memcpy(data, ucb->buf, tu->p.size + dsize); if
[PATCH 14/17] tracing/uprobes: Fetch args before reserving a ring buffer
From: Namhyung Kim namhyung@lge.com Fetching from user space should be done in a non-atomic context. So use a per-cpu buffer and copy its content to the ring buffer atomically. Note that we can migrate during accessing user memory thus use a per-cpu mutex to protect concurrent accesses. This is needed since we'll be able to fetch args from an user memory which can be swapped out. Before that uprobes could fetch args from registers only which saved in a kernel space. While at it, use __get_data_size() and store_trace_args() to reduce code duplication. And add struct uprobe_cpu_buffer and its helpers as suggested by Oleg. Reviewed-by: Masami Hiramatsu masami.hiramatsu...@hitachi.com Cc: Srikar Dronamraju sri...@linux.vnet.ibm.com Cc: Oleg Nesterov o...@redhat.com Cc: zhangwei(Jovi) jovi.zhang...@huawei.com Cc: Arnaldo Carvalho de Melo a...@ghostprotocols.net Signed-off-by: Namhyung Kim namhy...@kernel.org --- kernel/trace/trace_uprobe.c | 146 +++- 1 file changed, 132 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 5a6eb5686617..c037477d1c1a 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -650,21 +650,117 @@ static const struct file_operations uprobe_profile_ops = { .release= seq_release, }; +struct uprobe_cpu_buffer { + struct mutex mutex; + void *buf; +}; +static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; +static int uprobe_buffer_refcnt; + +static int uprobe_buffer_init(void) +{ + int cpu, err_cpu; + + uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); + if (uprobe_cpu_buffer == NULL) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct page *p = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL, 0); + if (p == NULL) { + err_cpu = cpu; + goto err; + } + per_cpu_ptr(uprobe_cpu_buffer, cpu)-buf = page_address(p); + mutex_init(per_cpu_ptr(uprobe_cpu_buffer, cpu)-mutex); + } + + return 0; + +err: + for_each_possible_cpu(cpu) { + if (cpu == err_cpu) + break; + free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)-buf); + } + + free_percpu(uprobe_cpu_buffer); + return -ENOMEM; +} + +static int uprobe_buffer_enable(void) +{ + int ret = 0; + + BUG_ON(!mutex_is_locked(event_mutex)); + + if (uprobe_buffer_refcnt++ == 0) { + ret = uprobe_buffer_init(); + if (ret 0) + uprobe_buffer_refcnt--; + } + + return ret; +} + +static void uprobe_buffer_disable(void) +{ + BUG_ON(!mutex_is_locked(event_mutex)); + + if (--uprobe_buffer_refcnt == 0) { + free_percpu(uprobe_cpu_buffer); + uprobe_cpu_buffer = NULL; + } +} + +static struct uprobe_cpu_buffer *uprobe_buffer_get(void) +{ + struct uprobe_cpu_buffer *ucb; + int cpu; + + cpu = raw_smp_processor_id(); + ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); + + /* +* Use per-cpu buffers for fastest access, but we might migrate +* so the mutex makes sure we have sole access to it. +*/ + mutex_lock(ucb-mutex); + + return ucb; +} + +static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) +{ + mutex_unlock(ucb-mutex); +} + static void uprobe_trace_print(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; + struct uprobe_cpu_buffer *ucb; void *data; - int size, i; + int size, dsize, esize; struct ftrace_event_call *call = tu-p.call; - size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + dsize = __get_data_size(tu-p, regs); + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + + if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu-p.size + dsize PAGE_SIZE)) + return; + + ucb = uprobe_buffer_get(); + store_trace_args(esize, tu-p, regs, ucb-buf, dsize); + + size = esize + tu-p.size + dsize; event = trace_current_buffer_lock_reserve(buffer, call-event.type, - size + tu-p.size, 0, 0); + size, 0, 0); if (!event) - return; + goto out; entry = ring_buffer_event_data(event); if (is_ret_probe(tu)) { @@ -676,13 +772,13 @@ static void uprobe_trace_print(struct trace_uprobe *tu, data = DATAOF_TRACE_ENTRY(entry, false); } - for (i = 0; i tu-p.nr_args; i++) { -