$ mount -o newinstance -t tracefs tracefs /sys/kernel/tracing
$
$
$ perf probe /lib/x86_64-linux-gnu/libc.so.6 malloc
Added new event:
probe_libc:malloc (on malloc in /lib/x86_64-linux-gnu/libc.so.6)
You can now use it in all perf tools, such as:
perf record -e probe_libc:malloc -aR sleep 1
$
$
$ perf probe --list
probe_libc:malloc (on __libc_malloc in /lib64/libc.so.6)
$
while another container/host has a completely different view:
$ perf probe --list
probe_libc:memset (on __libc_memset in /lib64/libc.so.6)
$
This patch reuses the code that provides support to create new instances
under tracefs instances directory.
Signed-off-by: Hari Bathini <hbath...@linux.vnet.ibm.com>
---
fs/tracefs/inode.c | 171 ++++++++++++++++++++++++++++++++++++++---------
include/linux/tracefs.h | 11 ++-
kernel/trace/trace.c | 52 ++++++++++----
3 files changed, 181 insertions(+), 53 deletions(-)
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 4a0e48f..2d6acda 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -51,9 +51,9 @@ static const struct file_operations tracefs_file_operations =
{
};
static struct tracefs_dir_ops {
- int (*mkdir)(const char *name);
- int (*rmdir)(const char *name);
-} tracefs_ops;
+ int (*mkdir)(int instance_type, void *data);
+ int (*rmdir)(int instance_type, void *data);
+} tracefs_instance_ops;
static char *get_dname(struct dentry *dentry)
{
@@ -85,7 +85,7 @@ static int tracefs_syscall_mkdir(struct inode *inode, struct
dentry *dentry, umo
* mkdir routine to handle races.
*/
inode_unlock(inode);
- ret = tracefs_ops.mkdir(name);
+ ret = tracefs_instance_ops.mkdir(INSTANCE_DIR, name);
inode_lock(inode);
kfree(name);
@@ -112,7 +112,7 @@ static int tracefs_syscall_rmdir(struct inode *inode,
struct dentry *dentry)
inode_unlock(inode);
inode_unlock(dentry->d_inode);
- ret = tracefs_ops.rmdir(name);
+ ret = tracefs_instance_ops.rmdir(INSTANCE_DIR, name);
inode_lock_nested(inode, I_MUTEX_PARENT);
inode_lock(dentry->d_inode);
@@ -142,12 +142,14 @@ struct tracefs_mount_opts {
kuid_t uid;
kgid_t gid;
umode_t mode;
+ int newinstance;
};
enum {
Opt_uid,
Opt_gid,
Opt_mode,
+ Opt_newinstance,
Opt_err
};
@@ -155,14 +157,26 @@ static const match_table_t tokens = {
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_mode, "mode=%o"},
+ {Opt_newinstance, "newinstance"},
{Opt_err, NULL}
};
struct tracefs_fs_info {
struct tracefs_mount_opts mount_opts;
+ struct super_block *sb;
};
-static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+static inline struct tracefs_fs_info *TRACEFS_SB(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+#define PARSE_MOUNT 0
+#define PARSE_REMOUNT 1
+
+static int tracefs_parse_options(char *data,
+ int op,
+ struct tracefs_mount_opts *opts)
{
substring_t args[MAX_OPT_ARGS];
int option;
@@ -173,6 +187,10 @@ static int tracefs_parse_options(char *data, struct
tracefs_mount_opts *opts)
opts->mode = TRACEFS_DEFAULT_MODE;
+ /* newinstance makes sense only on initial mount */
+ if (op == PARSE_MOUNT)
+ opts->newinstance = 0;
+
while ((p = strsep(&data, ",")) != NULL) {
if (!*p)
continue;
@@ -200,6 +218,11 @@ static int tracefs_parse_options(char *data, struct
tracefs_mount_opts *opts)
return -EINVAL;
opts->mode = option & S_IALLUGO;
break;
+ case Opt_newinstance:
+ /* newinstance makes sense only on initial mount */
+ if (op == PARSE_MOUNT)
+ opts->newinstance = 1;
+ break;
/*
* We might like to report bad mount options here;
* but traditionally tracefs has ignored all mount options
@@ -231,7 +254,7 @@ static int tracefs_remount(struct super_block *sb, int
*flags, char *data)
struct tracefs_fs_info *fsi = sb->s_fs_info;
sync_filesystem(sb);
- err = tracefs_parse_options(data, &fsi->mount_opts);
+ err = tracefs_parse_options(data, PARSE_REMOUNT, &fsi->mount_opts);
if (err)
goto fail;
@@ -254,6 +277,8 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root)
from_kgid_munged(&init_user_ns, opts->gid));
if (opts->mode != TRACEFS_DEFAULT_MODE)
seq_printf(m, ",mode=%o", opts->mode);
+ if (opts->newinstance)
+ seq_puts(m, ",newinstance");
return 0;
}
@@ -264,53 +289,130 @@ static const struct super_operations
tracefs_super_operations = {
.show_options = tracefs_show_options,
};
-static int trace_fill_super(struct super_block *sb, void *data, int silent)
+static void *new_tracefs_fs_info(struct super_block *sb)
{
- static struct tree_descr trace_files[] = {{""}};
struct tracefs_fs_info *fsi;
- int err;
-
- save_mount_options(sb, data);
fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
- sb->s_fs_info = fsi;
- if (!fsi) {
- err = -ENOMEM;
- goto fail;
- }
+ if (!fsi)
+ return NULL;
- err = tracefs_parse_options(data, &fsi->mount_opts);
- if (err)
+ fsi->mount_opts.mode = TRACEFS_DEFAULT_MODE;
+ fsi->sb = sb;
+
+ return fsi;
+}
+
+static int trace_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct inode *inode;
+
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_magic = TRACEFS_MAGIC;
+ sb->s_op = &tracefs_super_operations;
+ sb->s_time_gran = 1;
+
+ sb->s_fs_info = new_tracefs_fs_info(sb);
+ if (!sb->s_fs_info)
goto fail;
- err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
- if (err)
+ inode = new_inode(sb);
+ if (!inode)
goto fail;
+ inode->i_ino = 1;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ set_nlink(inode, 2);
- sb->s_op = &tracefs_super_operations;
+ sb->s_root = d_make_root(inode);
+ if (sb->s_root)
+ return 0;
- tracefs_apply_options(sb);
+ pr_err("get root dentry failed\n");
return 0;
fail:
- kfree(fsi);
- sb->s_fs_info = NULL;
- return err;
+ return -ENOMEM;
+}
+
+static int compare_init_tracefs_sb(struct super_block *s, void *p)
+{
+ if (tracefs_mount)
+ return tracefs_mount->mnt_sb == s;
+ return 0;
}
static struct dentry *trace_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data)
{
- return mount_single(fs_type, flags, data, trace_fill_super);
+ int err;
+ struct tracefs_mount_opts opts;
+ struct super_block *s;
+
+ err = tracefs_parse_options(data, PARSE_MOUNT, &opts);
+ if (err)
+ return ERR_PTR(err);
+
+ /* Require newinstance for all user namespace mounts to ensure
+ * the mount options are not changed.
+ */
+ if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
+ return ERR_PTR(-EINVAL);
+
+ if (opts.newinstance)
+ s = sget(fs_type, NULL, set_anon_super, flags, NULL);
+ else
+ s = sget(fs_type, compare_init_tracefs_sb, set_anon_super,
+ flags, NULL);
+
+ if (IS_ERR(s))
+ return ERR_CAST(s);
+
+ if (!s->s_root) {
+ err = trace_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
+ if (err)
+ goto out_undo_sget;
+ s->s_flags |= MS_ACTIVE;
+ }
+
+ if (opts.newinstance) {
+ err = tracefs_instance_ops.mkdir(INSTANCE_MNT, s->s_root);
+ if (err)
+ goto out_undo_sget;
+ }
+
+ memcpy(&(TRACEFS_SB(s))->mount_opts, &opts, sizeof(opts));
+
+ tracefs_apply_options(s);
+
+ return dget(s->s_root);
+
+out_undo_sget:
+ deactivate_locked_super(s);
+ return ERR_PTR(err);
+}
+
+static void trace_kill_sb(struct super_block *sb)
+{
+ struct tracefs_fs_info *fsi = TRACEFS_SB(sb);
+
+ if (fsi->mount_opts.newinstance)
+ tracefs_instance_ops.rmdir(INSTANCE_MNT, sb->s_root);
+
+ kfree(fsi);
+ kill_litter_super(sb);
}
static struct file_system_type trace_fs_type = {
.owner = THIS_MODULE,
.name = "tracefs",
.mount = trace_mount,
- .kill_sb = kill_litter_super,
+ .kill_sb = trace_kill_sb,
};
MODULE_ALIAS_FS("tracefs");
@@ -480,22 +582,23 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
*
* Returns the dentry of the instances directory.
*/
-struct dentry *tracefs_create_instance_dir(const char *name, struct dentry
*parent,
- int (*mkdir)(const char *name),
- int (*rmdir)(const char *name))
+struct dentry *
+tracefs_create_instance_dir(const char *name, struct dentry *parent,
+ int (*mkdir)(int instance_type, void *data),
+ int (*rmdir)(int instance_type, void *data))
{
struct dentry *dentry;
/* Only allow one instance of the instances directory. */
- if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
+ if (WARN_ON(tracefs_instance_ops.mkdir || tracefs_instance_ops.rmdir))
return NULL;
dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
if (!dentry)
return NULL;
- tracefs_ops.mkdir = mkdir;
- tracefs_ops.rmdir = rmdir;
+ tracefs_instance_ops.mkdir = mkdir;
+ tracefs_instance_ops.rmdir = rmdir;
return dentry;
}
diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h
index 5b727a1..30d4e55 100644
--- a/include/linux/tracefs.h
+++ b/include/linux/tracefs.h
@@ -25,6 +25,10 @@ struct file_operations;
#ifdef CONFIG_TRACING
+/* instance types */
+#define INSTANCE_DIR 0 /* created inside instances dir */
+#define INSTANCE_MNT 1 /* created with newinstance mount option */
+
struct dentry *tracefs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops);
@@ -34,9 +38,10 @@ struct dentry *tracefs_create_dir(const char *name, struct
dentry *parent);
void tracefs_remove(struct dentry *dentry);
void tracefs_remove_recursive(struct dentry *dentry);
-struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
- int (*mkdir)(const char *name),
- int (*rmdir)(const char *name));
+struct dentry *
+tracefs_create_instance_dir(const char *name, struct dentry *parent,
+ int (*mkdir)(int instance_type, void *data),
+ int (*rmdir)(int instance_type, void *data));
bool tracefs_initialized(void);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 23a8111..a991e9d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6782,17 +6782,24 @@ static void update_tracer_options(struct trace_array
*tr)
mutex_unlock(&trace_types_lock);
}
-static int instance_mkdir(const char *name)
+static int instance_mkdir(int instance_type, void *data)
{
+ const char *name = "tracing";
+ struct dentry *mnt_root = NULL;
struct trace_array *tr;
int ret;
mutex_lock(&trace_types_lock);
- ret = -EEXIST;
- list_for_each_entry(tr, &ftrace_trace_arrays, list) {
- if (tr->name && strcmp(tr->name, name) == 0)
- goto out_unlock;
+ if (instance_type == INSTANCE_MNT)
+ mnt_root = data;
+ else {
+ name = data;
+ ret = -EEXIST;
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr->name && strcmp(tr->name, name) == 0)
+ goto out_unlock;
+ }
}
ret = -ENOMEM;
@@ -6823,9 +6830,14 @@ static int instance_mkdir(const char *name)
if (allocate_trace_buffers(tr, trace_buf_size) < 0)
goto out_free_tr;
- tr->dir = tracefs_create_dir(name, trace_instance_dir);
- if (!tr->dir)
- goto out_free_tr;
+ if (instance_type == INSTANCE_MNT) {
+ mnt_root->d_inode->i_private = tr;
+ tr->dir = mnt_root;
+ } else {
+ tr->dir = tracefs_create_dir(name, trace_instance_dir);
+ if (!tr->dir)
+ goto out_free_tr;
+ }
ret = event_trace_add_tracer(tr->dir, tr);
if (ret) {
@@ -6856,8 +6868,10 @@ static int instance_mkdir(const char *name)
}
-static int instance_rmdir(const char *name)
+static int instance_rmdir(int instance_type, void *data)
{
+ const char *name = "tracing";
+ struct dentry *mnt_root = NULL;
struct trace_array *tr;
int found = 0;
int ret;
@@ -6865,15 +6879,21 @@ static int instance_rmdir(const char *name)
mutex_lock(&trace_types_lock);
- ret = -ENODEV;
- list_for_each_entry(tr, &ftrace_trace_arrays, list) {
- if (tr->name && strcmp(tr->name, name) == 0) {
- found = 1;
- break;
+ if (instance_type == INSTANCE_MNT) {
+ mnt_root = data;
+ tr = mnt_root->d_inode->i_private;
+ } else {
+ name = data;
+ ret = -ENODEV;
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr->name && strcmp(tr->name, name) == 0) {
+ found = 1;
+ break;
+ }
}
+ if (!found)
+ goto out_unlock;
}
- if (!found)
- goto out_unlock;
ret = -EBUSY;
if (tr->ref || (tr->current_trace && tr->current_trace->ref))