On Sat, 7 Oct 2023 19:44:45 +0900 Masami Hiramatsu (Google) <mhira...@kernel.org> wrote:
> [...] > > @@ -118,6 +72,7 @@ static struct dentry *create_file(const char *name, > > umode_t mode, > > if (WARN_ON_ONCE(!S_ISREG(mode))) > > return NULL; > > > > + WARN_ON_ONCE(!parent); > > Nit: This looks a wrong case and should not create a file under root > directory. > So it should return NULL. (but it shouldn't happen.) Yes it should never happen (hence the warn on), but if it does happen, it shouldn't cause any real harm, so I decided not to return early. > > > dentry = eventfs_start_creating(name, parent); > > > > if (IS_ERR(dentry)) > > > > +static struct dentry * > > +create_file_dentry(struct eventfs_inode *ei, struct dentry **e_dentry, > > + struct dentry *parent, const char *name, umode_t mode, void > > *data, > > + const struct file_operations *fops, bool lookup) > > +{ > > + struct dentry *dentry; > > + bool invalidate = false; > > + > > + mutex_lock(&eventfs_mutex); > > + /* If the e_dentry already has a dentry, use it */ > > + if (*e_dentry) { > > + /* lookup does not need to up the ref count */ > > + if (!lookup) > > + dget(*e_dentry); > > + mutex_unlock(&eventfs_mutex); > > + return *e_dentry; > > + } > > + mutex_unlock(&eventfs_mutex); > > + > > + /* The lookup already has the parent->d_inode locked */ > > + if (!lookup) > > + inode_lock(parent->d_inode); > > + > > + dentry = create_file(name, mode, parent, data, fops); > > + > > + if (!lookup) > > + inode_unlock(parent->d_inode); > > + > > + mutex_lock(&eventfs_mutex); > > + > > + if (IS_ERR_OR_NULL(dentry)) { > > + /* > > + * When the mutex was released, something else could have > > + * created the dentry for this e_dentry. In which case > > + * use that one. > > + * > > + * Note, with the mutex held, the e_dentry cannot have content > > + * and the ei->is_freed be true at the same time. > > + */ > > + WARN_ON_ONCE(ei->is_freed); > > + dentry = *e_dentry; > > + /* The lookup does not need to up the dentry refcount */ > > + if (dentry && !lookup) > > + dget(dentry); > > + mutex_unlock(&eventfs_mutex); > > + return dentry; > > + } > > + > > + if (!*e_dentry && !ei->is_freed) { > > + *e_dentry = dentry; > > + dentry->d_fsdata = ei; > > Nit: If we use LSB for existing flag, this should be checked. e.g. > WARN_ON_ONCE(ei & 1). But ei->is_freed is set before we set that LSB. Why should we check it here if we already checked ei->is_freed? > > > > + } else { > > + /* > > + * Should never happen unless we get here due to being freed. > > + * Otherwise it means two dentries exist with the same name. > > + */ > > + WARN_ON_ONCE(!ei->is_freed); > > + invalidate = true; > > + } > > + mutex_unlock(&eventfs_mutex); > > + > > + if (invalidate) > > + d_invalidate(dentry); > > + > > + if (lookup || invalidate) > > + dput(dentry); > > + > > + return invalidate ? NULL : dentry; > > +} > > + > > /** > > * eventfs_post_create_dir - post create dir routine > > - * @ef: eventfs_file of recently created dir > > + * @ei: eventfs_inode of recently created dir > > * > > * Map the meta-data of files within an eventfs dir to their parent dentry > > */ > > -static void eventfs_post_create_dir(struct eventfs_file *ef) > > +static void eventfs_post_create_dir(struct eventfs_inode *ei) > > { > > - struct eventfs_file *ef_child; > > + struct eventfs_inode *ei_child; > > struct tracefs_inode *ti; > > > > /* srcu lock already held */ > > /* fill parent-child relation */ > > - list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list, > > + list_for_each_entry_srcu(ei_child, &ei->children, list, > > srcu_read_lock_held(&eventfs_srcu)) { > > - ef_child->d_parent = ef->dentry; > > + ei_child->d_parent = ei->dentry; > > } > > > > - ti = get_tracefs(ef->dentry->d_inode); > > - ti->private = ef->ei; > > + ti = get_tracefs(ei->dentry->d_inode); > > + ti->private = ei; > > } > > > > /** > > - * create_dentry - helper function to create dentry > > - * @ef: eventfs_file of file or directory to create > > - * @parent: parent dentry > > - * @lookup: true if called from lookup routine > > + * create_dir_dentry - Create a directory dentry for the eventfs_inode > > + * @ei: The eventfs_inode to create the directory for > > + * @parent: The dentry of the parent of this directory > > + * @lookup: True if this is called by the lookup code > > * > > - * Used to create a dentry for file/dir, executes post dentry creation > > routine > > + * This creates and attaches a directory dentry to the eventfs_inode @ei. > > */ > > static struct dentry * > > -create_dentry(struct eventfs_file *ef, struct dentry *parent, bool lookup) > > +create_dir_dentry(struct eventfs_inode *ei, struct dentry *parent, bool > > lookup) > > { > > bool invalidate = false; > > - struct dentry *dentry; > > + struct dentry *dentry = NULL; > > > > mutex_lock(&eventfs_mutex); > > - if (ef->is_freed) { > > - mutex_unlock(&eventfs_mutex); > > - return NULL; > > - } > > - if (ef->dentry) { > > - dentry = ef->dentry; > > - /* On dir open, up the ref count */ > > + if (ei->dentry) { > > + /* If the dentry already has a dentry, use it */ > > + dentry = ei->dentry; > > + /* lookup does not need to up the ref count */ > > if (!lookup) > > dget(dentry); > > mutex_unlock(&eventfs_mutex); > > @@ -302,42 +343,44 @@ create_dentry(struct eventfs_file *ef, struct dentry > > *parent, bool lookup) > > } > > mutex_unlock(&eventfs_mutex); > > > > + /* The lookup already has the parent->d_inode locked */ > > if (!lookup) > > inode_lock(parent->d_inode); > > > > - if (ef->ei) > > - dentry = create_dir(ef->name, parent, ef->data); > > - else > > - dentry = create_file(ef->name, ef->mode, parent, > > - ef->data, ef->fop); > > + dentry = create_dir(ei->name, parent); > > > > if (!lookup) > > inode_unlock(parent->d_inode); > > > > mutex_lock(&eventfs_mutex); > > - if (IS_ERR_OR_NULL(dentry)) { > > - /* If the ef was already updated get it */ > > - dentry = ef->dentry; > > + > > + if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { > > + /* > > + * When the mutex was released, something else could have > > + * created the dentry for this e_dentry. In which case > > + * use that one. > > + * > > + * Note, with the mutex held, the e_dentry cannot have content > > + * and the ei->is_freed be true at the same time. > > + */ > > + dentry = ei->dentry; > > if (dentry && !lookup) > > dget(dentry); > > mutex_unlock(&eventfs_mutex); > > return dentry; > > } > > > > - if (!ef->dentry && !ef->is_freed) { > > - ef->dentry = dentry; > > - if (ef->ei) > > - eventfs_post_create_dir(ef); > > - dentry->d_fsdata = ef; > > + if (!ei->dentry && !ei->is_freed) { > > + ei->dentry = dentry; > > + eventfs_post_create_dir(ei); > > + dentry->d_fsdata = ei; > > Ditto. And again, the LSB is set after ei->is_freed is set. > > > } else { > > - /* A race here, should try again (unless freed) */ > > - invalidate = true; > > - > > /* > > * Should never happen unless we get here due to being freed. > > * Otherwise it means two dentries exist with the same name. > > */ > > - WARN_ON_ONCE(!ef->is_freed); > > + WARN_ON_ONCE(!ei->is_freed); > > + invalidate = true; > > } > > mutex_unlock(&eventfs_mutex); > > if (invalidate) > > @@ -349,50 +392,82 @@ create_dentry(struct eventfs_file *ef, struct dentry > > *parent, bool lookup) > > return invalidate ? NULL : dentry; > > } > > > > -static bool match_event_file(struct eventfs_file *ef, const char *name) > > -{ > > - bool ret; > > - > > - mutex_lock(&eventfs_mutex); > > - ret = !ef->is_freed && strcmp(ef->name, name) == 0; > > - mutex_unlock(&eventfs_mutex); > > - > > - return ret; > > -} > > - > > /** > > * eventfs_root_lookup - lookup routine to create file/dir > > * @dir: in which a lookup is being done > > * @dentry: file/dir dentry > > - * @flags: to pass as flags parameter to simple lookup > > + * @flags: Just passed to simple_lookup() > > * > > - * Used to create a dynamic file/dir within @dir. Use the eventfs_inode > > - * list of meta data to find the information needed to create the file/dir. > > + * Used to create dynamic file/dir with-in @dir, search with-in @ei > > + * list, if @dentry found go ahead and create the file/dir > > */ > > + > > static struct dentry *eventfs_root_lookup(struct inode *dir, > > struct dentry *dentry, > > unsigned int flags) > > { > > + const struct file_operations *fops; > > + const struct eventfs_entry *entry; > > + struct eventfs_inode *ei_child; > > struct tracefs_inode *ti; > > struct eventfs_inode *ei; > > - struct eventfs_file *ef; > > struct dentry *ret = NULL; > > + const char *name = dentry->d_name.name; > > + bool created = false; > > + umode_t mode; > > + void *data; > > int idx; > > + int i; > > + int r; > > > > ti = get_tracefs(dir); > > if (!(ti->flags & TRACEFS_EVENT_INODE)) > > return NULL; > > > > - ei = ti->private; > > + /* Grab srcu to prevent the ei from going away */ > > idx = srcu_read_lock(&eventfs_srcu); > > - list_for_each_entry_srcu(ef, &ei->e_top_files, list, > > + > > + /* > > + * Grab the eventfs_mutex to consistent value from ti->private. > > + * This s > > + */ > > + mutex_lock(&eventfs_mutex); > > + ei = READ_ONCE(ti->private); > > + mutex_unlock(&eventfs_mutex); > > + > > + if (!ei) > > + goto out; > > + > > + data = ei->data; > > + > > + list_for_each_entry_srcu(ei_child, &ei->children, list, > > srcu_read_lock_held(&eventfs_srcu)) { > > - if (!match_event_file(ef, dentry->d_name.name)) > > + if (strcmp(ei_child->name, name) != 0) > > continue; > > ret = simple_lookup(dir, dentry, flags); > > Don't we check this return value? It's the return code from this function. It shouldn't affect the next lines. > > > - create_dentry(ef, ef->d_parent, true); > > + create_dir_dentry(ei_child, ei->dentry, true); > > + created = true; > > break; > > } > > + > > + if (created) > > + goto out; > > + > > + for (i = 0; i < ei->nr_entries; i++) { > > + entry = &ei->entries[i]; > > + if (strcmp(name, entry->name) == 0) { > > + void *cdata = data; > > + r = entry->callback(name, &mode, &cdata, &fops); > > + if (r <= 0) > > + continue; > > + ret = simple_lookup(dir, dentry, flags); > > Ditto. The same as above. It's just the return code of the function. > > > + create_file_dentry(ei, &ei->d_children[i], > > + ei->dentry, name, mode, cdata, > > + fops, true); > > + break; > > + } > > + } > > + out: > > srcu_read_unlock(&eventfs_srcu, idx); > > return ret; > > } > [...] > > @@ -2400,15 +2416,134 @@ event_define_fields(struct trace_event_call *call) > > return ret; > > } > > > > +static int event_callback(const char *name, umode_t *mode, void **data, > > + const struct file_operations **fops) > > +{ > > + struct trace_event_file *file = *data; > > + struct trace_event_call *call = file->event_call; > > + > > + if (strcmp(name, "format") == 0) { > > + *mode = TRACE_MODE_READ; > > + *fops = &ftrace_event_format_fops; > > + *data = call; > > + return 1; > > + } > > + > > + /* > > + * Only event directories that can be enabled should have > > + * triggers or filters, with the exception of the "print" > > + * event that can have a "trigger" file. > > + */ > > + if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { > > + if (call->class->reg && strcmp(name, "enable") == 0) { > > + *mode = TRACE_MODE_WRITE; > > + *fops = &ftrace_enable_fops; > > + return 1; > > + } > > + > > + if (strcmp(name, "filter") == 0) { > > + *mode = TRACE_MODE_WRITE; > > + *fops = &ftrace_event_filter_fops; > > + return 1; > > + } > > + } > > + > > + if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || > > + strcmp(trace_event_name(call), "print") == 0) { > > + if (strcmp(name, "trigger") == 0) { > > + *mode = TRACE_MODE_WRITE; > > + *fops = &event_trigger_fops; > > + return 1; > > + } > > + } > > + > > +#ifdef CONFIG_PERF_EVENTS > > + if (call->event.type && call->class->reg && > > + strcmp(name, "id") == 0) { > > + *mode = TRACE_MODE_READ; > > + *data = (void *)(long)call->event.type; > > + *fops = &ftrace_event_id_fops; > > + return 1; > > + } > > +#endif > > + > > +#ifdef CONFIG_HIST_TRIGGERS > > + if (strcmp(name, "hist") == 0) { > > + *mode = TRACE_MODE_READ; > > + *fops = &event_hist_fops; > > + return 1; > > + } > > +#endif > > +#ifdef CONFIG_HIST_TRIGGERS_DEBUG > > + if (strcmp(name, "hist_debug") == 0) { > > + *mode = TRACE_MODE_READ; > > + *fops = &event_hist_debug_fops; > > + return 1; > > + } > > +#endif > > +#ifdef CONFIG_TRACE_EVENT_INJECT > > + if (call->event.type && call->class->reg && > > + strcmp(name, "inject") == 0) { > > + *mode = 0200; > > + *fops = &event_inject_fops; > > + return 1; > > + } > > +#endif > > + return 0; > > +} > > + > > static int > > -event_create_dir(struct dentry *parent, struct trace_event_file *file) > > +event_create_dir(struct eventfs_inode *parent, struct trace_event_file > > *file) > > { > > struct trace_event_call *call = file->event_call; > > - struct eventfs_file *ef_subsystem = NULL; > > struct trace_array *tr = file->tr; > > - struct eventfs_file *ef; > > + struct eventfs_inode *e_events; > > + struct eventfs_inode *ei; > > const char *name; > > + int nr_entries; > > int ret; > > + static struct eventfs_entry event_entries[] = { > > + { > > + .name = "enable", > > + .callback = event_callback, > > + }, > > + { > > + .name = "filter", > > + .callback = event_callback, > > + }, > > + { > > + .name = "trigger", > > + .callback = event_callback, > > + }, > > + { > > + .name = "format", > > + .callback = event_callback, > > + }, > > +#ifdef CONFIG_PERF_EVENTS > > + { > > + .name = "id", > > + .callback = event_callback, > > + }, > > +#endif > > +#ifdef CONFIG_HIST_TRIGGERS > > + { > > + .name = "hist", > > + .callback = event_callback, > > + }, > > +#endif > > +#ifdef CONFIG_HIST_TRIGGERS_DEBUG > > + { > > + .name = "hist_debug", > > + .callback = event_callback, > > + }, > > +#endif > > +#ifdef CONFIG_TRACE_EVENT_INJECT > > + { > > + .name = "inject", > > + .callback = event_callback, > > + }, > > +#endif > > Q: I wonder why these uses the same 'event_callback'? it seems those have > different callbacks... It really is just a preference. I could have it use a different callback, but if you look at the event_callback(), it looks at the name passed in to determine what to do. So yes, they do different things when the name passed in is different. > [...] > > > +static int events_callback(const char *name, umode_t *mode, void **data, > > + const struct file_operations **fops) > > +{ > > + if (strcmp(name, "enable") == 0) { > > + *mode = TRACE_MODE_WRITE; > > + *fops = &ftrace_tr_enable_fops; > > + return 1; > > + } > > + > > + if (strcmp(name, "header_page") == 0) > > + *data = ring_buffer_print_page_header; > > + > > + else if (strcmp(name, "header_event") == 0) > > + *data = ring_buffer_print_entry_header; > > + > > + else > > + return 0; > > + > > + *mode = TRACE_MODE_READ; > > + *fops = &ftrace_show_header_fops; > > + return 1; > > +} > > + > > /* Expects to have event_mutex held when called */ > > static int > > create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) > > { > > - struct dentry *d_events; > > + struct eventfs_inode *e_events; > > struct dentry *entry; > > - int error = 0; > > + int nr_entries; > > + static struct eventfs_entry events_entries[] = { > > + { > > + .name = "enable", > > + .callback = events_callback, > > + }, > > + { > > + .name = "header_page", > > + .callback = events_callback, > > + }, > > + { > > + .name = "header_event", > > + .callback = events_callback, > > + }, > > + }; > > Here too. Same reason. > > Thank you, > Thanks for reviewing. -- Steve