[PATCH 05/12] perf trace: Deref sys_enter pointer args with contents from probe:vfs_getname
From: Arnaldo Carvalho de Melo To work like strace and dereference syscall pointer args we need to insert probes (or tracepoints) right after we copy those bytes from userspace. Since we're formatting the syscall args at raw_syscalls:sys_enter time, we need to have a formatter that just stores the position where, later, when we get the probe:vfs_getname, we can insert the pointer contents. Now, if a probe:vfs_getname with this format is in place: # perf probe -l probe:vfs_getname (on getname_flags:72@/home/git/linux/fs/namei.c with pathname) That was, in this case, put in place with: # perf probe 'vfs_getname=getname_flags:72 pathname=filename:string' Added new event: probe:vfs_getname(on getname_flags:72 with pathname=filename:string) You can now use it in all perf tools, such as: perf record -e probe:vfs_getname -aR sleep 1 # Then 'perf trace' will notice that and do the pointer -> contents expansion: # trace -e open touch /tmp/bla 0.165 (0.010 ms): touch/17752 open(filename: /etc/ld.so.cache, flags: CLOEXEC) = 3 0.195 (0.011 ms): touch/17752 open(filename: /lib64/libc.so.6, flags: CLOEXEC) = 3 0.512 (0.012 ms): touch/17752 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3 0.582 (0.012 ms): touch/17752 open(filename: /tmp/bla, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: 438) = 3 # Roughly equivalent to strace's output: # strace -rT -e open touch /tmp/bla 0.00 open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 <0.39> 0.000317 open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000102> 0.001461 open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 <0.72> 0.000405 open("/tmp/bla", O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK, 0666) = 3 <0.55> 0.000641 +++ exited with 0 +++ # Now we need to either look for at all syscalls that are marked as pointers and have some well known names ("filename", "pathname", etc) and set the arg formatter to the one used for the "open" syscall in this patch. This implementation works for syscalls with just a string being copied from userspace, for matching syscalls with more than one string being copied via the same probe/trace point (vfs_getname) we need to extend the vfs_getname probe spec to include the pointer too, but there are some problems with that in 'perf probe' or the kernel kprobes code, need to investigate before considering supporting multiple strings per syscall. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-xvuwx6nuj8cf389kf9s2u...@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 76 +- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 12d6fc0227b1..0255dd892662 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -744,6 +744,11 @@ static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, #define SCA_ACCMODE syscall_arg__scnprintf_access_mode +static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, + struct syscall_arg *arg); + +#define SCA_FILENAME syscall_arg__scnprintf_filename + static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -1088,7 +1093,8 @@ static struct syscall_fmt { { .name = "newfstatat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "open", .errmsg = true, - .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ +[1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "open_by_handle_at", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, @@ -1208,6 +1214,11 @@ static size_t fprintf_duration(unsigned long t, FILE *fp) return printed + fprintf(fp, "): "); } +/** + * filename.ptr: The filename char pointer that will be vfs_getname'd + * filename.entry_str_pos: Where to insert the string translated from + * filename.ptr by the vfs_getname tracepoint/kprobe. + */ struct thread_trace { u64 entry_time; u64 exit_time; @@ -1216,6 +1227,10 @@ struct thread_trace { unsigned long pfmaj, pfmin; char *entry_str; doubleruntime_ms; +struct { + unsigned long ptr; + int entry_str_pos; + } filename; struct { int max; char **table; @@ -1418,6 +1433,27 @@ static size_t
[PATCH 05/12] perf trace: Deref sys_enter pointer args with contents from probe:vfs_getname
From: Arnaldo Carvalho de Melo a...@redhat.com To work like strace and dereference syscall pointer args we need to insert probes (or tracepoints) right after we copy those bytes from userspace. Since we're formatting the syscall args at raw_syscalls:sys_enter time, we need to have a formatter that just stores the position where, later, when we get the probe:vfs_getname, we can insert the pointer contents. Now, if a probe:vfs_getname with this format is in place: # perf probe -l probe:vfs_getname (on getname_flags:72@/home/git/linux/fs/namei.c with pathname) That was, in this case, put in place with: # perf probe 'vfs_getname=getname_flags:72 pathname=filename:string' Added new event: probe:vfs_getname(on getname_flags:72 with pathname=filename:string) You can now use it in all perf tools, such as: perf record -e probe:vfs_getname -aR sleep 1 # Then 'perf trace' will notice that and do the pointer - contents expansion: # trace -e open touch /tmp/bla 0.165 (0.010 ms): touch/17752 open(filename: /etc/ld.so.cache, flags: CLOEXEC) = 3 0.195 (0.011 ms): touch/17752 open(filename: /lib64/libc.so.6, flags: CLOEXEC) = 3 0.512 (0.012 ms): touch/17752 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3 0.582 (0.012 ms): touch/17752 open(filename: /tmp/bla, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: 438) = 3 # Roughly equivalent to strace's output: # strace -rT -e open touch /tmp/bla 0.00 open(/etc/ld.so.cache, O_RDONLY|O_CLOEXEC) = 3 0.39 0.000317 open(/lib64/libc.so.6, O_RDONLY|O_CLOEXEC) = 3 0.000102 0.001461 open(/usr/lib/locale/locale-archive, O_RDONLY|O_CLOEXEC) = 3 0.72 0.000405 open(/tmp/bla, O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK, 0666) = 3 0.55 0.000641 +++ exited with 0 +++ # Now we need to either look for at all syscalls that are marked as pointers and have some well known names (filename, pathname, etc) and set the arg formatter to the one used for the open syscall in this patch. This implementation works for syscalls with just a string being copied from userspace, for matching syscalls with more than one string being copied via the same probe/trace point (vfs_getname) we need to extend the vfs_getname probe spec to include the pointer too, but there are some problems with that in 'perf probe' or the kernel kprobes code, need to investigate before considering supporting multiple strings per syscall. Cc: Adrian Hunter adrian.hun...@intel.com Cc: Borislav Petkov b...@suse.de Cc: David Ahern dsah...@gmail.com Cc: Frederic Weisbecker fweis...@gmail.com Cc: Jiri Olsa jo...@redhat.com Cc: Milian Wolff m...@milianw.de Cc: Namhyung Kim namhy...@kernel.org Cc: Stephane Eranian eran...@google.com Link: http://lkml.kernel.org/n/tip-xvuwx6nuj8cf389kf9s2u...@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo a...@redhat.com --- tools/perf/builtin-trace.c | 76 +- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 12d6fc0227b1..0255dd892662 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -744,6 +744,11 @@ static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, #define SCA_ACCMODE syscall_arg__scnprintf_access_mode +static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, + struct syscall_arg *arg); + +#define SCA_FILENAME syscall_arg__scnprintf_filename + static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -1088,7 +1093,8 @@ static struct syscall_fmt { { .name = newfstatat, .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = open, .errmsg = true, - .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ +[1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = open_by_handle_at, .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, @@ -1208,6 +1214,11 @@ static size_t fprintf_duration(unsigned long t, FILE *fp) return printed + fprintf(fp, ): ); } +/** + * filename.ptr: The filename char pointer that will be vfs_getname'd + * filename.entry_str_pos: Where to insert the string translated from + * filename.ptr by the vfs_getname tracepoint/kprobe. + */ struct thread_trace { u64 entry_time; u64 exit_time; @@ -1216,6 +1227,10 @@ struct thread_trace { unsigned long pfmaj, pfmin; char *entry_str; doubleruntime_ms; +struct { + unsigned long ptr; + int entry_str_pos; + }