On Sat, 7 Oct 2023 19:44:45 +0900
Masami Hiramatsu (Google) <mhira...@kernel.org> wrote:

> [...]
> > @@ -118,6 +72,7 @@ static struct dentry *create_file(const char *name, 
> > umode_t mode,
> >     if (WARN_ON_ONCE(!S_ISREG(mode)))
> >             return NULL;
> >  
> > +   WARN_ON_ONCE(!parent);  
> 
> Nit: This looks a wrong case and should not create a file under root 
> directory.
>      So it should return NULL. (but it shouldn't happen.)

Yes it should never happen (hence the warn on), but if it does happen, it
shouldn't cause any real harm, so I decided not to return early.

> 
> >     dentry = eventfs_start_creating(name, parent);
> >  
> >     if (IS_ERR(dentry))  
> 
>


> > +static struct dentry *
> > +create_file_dentry(struct eventfs_inode *ei, struct dentry **e_dentry,
> > +              struct dentry *parent, const char *name, umode_t mode, void 
> > *data,
> > +              const struct file_operations *fops, bool lookup)
> > +{
> > +   struct dentry *dentry;
> > +   bool invalidate = false;
> > +
> > +   mutex_lock(&eventfs_mutex);
> > +   /* If the e_dentry already has a dentry, use it */
> > +   if (*e_dentry) {
> > +           /* lookup does not need to up the ref count */
> > +           if (!lookup)
> > +                   dget(*e_dentry);
> > +           mutex_unlock(&eventfs_mutex);
> > +           return *e_dentry;
> > +   }
> > +   mutex_unlock(&eventfs_mutex);
> > +
> > +   /* The lookup already has the parent->d_inode locked */
> > +   if (!lookup)
> > +           inode_lock(parent->d_inode);
> > +
> > +   dentry = create_file(name, mode, parent, data, fops);
> > +
> > +   if (!lookup)
> > +           inode_unlock(parent->d_inode);
> > +
> > +   mutex_lock(&eventfs_mutex);
> > +
> > +   if (IS_ERR_OR_NULL(dentry)) {
> > +           /*
> > +            * When the mutex was released, something else could have
> > +            * created the dentry for this e_dentry. In which case
> > +            * use that one.
> > +            *
> > +            * Note, with the mutex held, the e_dentry cannot have content
> > +            * and the ei->is_freed be true at the same time.
> > +            */
> > +           WARN_ON_ONCE(ei->is_freed);
> > +           dentry = *e_dentry;
> > +           /* The lookup does not need to up the dentry refcount */
> > +           if (dentry && !lookup)
> > +                   dget(dentry);
> > +           mutex_unlock(&eventfs_mutex);
> > +           return dentry;
> > +   }
> > +
> > +   if (!*e_dentry && !ei->is_freed) {
> > +           *e_dentry = dentry;
> > +           dentry->d_fsdata = ei;  
> 
> Nit: If we use LSB for existing flag, this should be checked. e.g. 
> WARN_ON_ONCE(ei & 1).

But ei->is_freed is set before we set that LSB. Why should we check it here
if we already checked ei->is_freed?

> 
> 
> > +   } else {
> > +           /*
> > +            * Should never happen unless we get here due to being freed.
> > +            * Otherwise it means two dentries exist with the same name.
> > +            */
> > +           WARN_ON_ONCE(!ei->is_freed);
> > +           invalidate = true;
> > +   }
> > +   mutex_unlock(&eventfs_mutex);
> > +
> > +   if (invalidate)
> > +           d_invalidate(dentry);
> > +
> > +   if (lookup || invalidate)
> > +           dput(dentry);
> > +
> > +   return invalidate ? NULL : dentry;
> > +}
> > +
> >  /**
> >   * eventfs_post_create_dir - post create dir routine
> > - * @ef: eventfs_file of recently created dir
> > + * @ei: eventfs_inode of recently created dir
> >   *
> >   * Map the meta-data of files within an eventfs dir to their parent dentry
> >   */
> > -static void eventfs_post_create_dir(struct eventfs_file *ef)
> > +static void eventfs_post_create_dir(struct eventfs_inode *ei)
> >  {
> > -   struct eventfs_file *ef_child;
> > +   struct eventfs_inode *ei_child;
> >     struct tracefs_inode *ti;
> >  
> >     /* srcu lock already held */
> >     /* fill parent-child relation */
> > -   list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
> > +   list_for_each_entry_srcu(ei_child, &ei->children, list,
> >                              srcu_read_lock_held(&eventfs_srcu)) {
> > -           ef_child->d_parent = ef->dentry;
> > +           ei_child->d_parent = ei->dentry;
> >     }
> >  
> > -   ti = get_tracefs(ef->dentry->d_inode);
> > -   ti->private = ef->ei;
> > +   ti = get_tracefs(ei->dentry->d_inode);
> > +   ti->private = ei;
> >  }
> >  
> >  /**
> > - * create_dentry - helper function to create dentry
> > - * @ef: eventfs_file of file or directory to create
> > - * @parent: parent dentry
> > - * @lookup: true if called from lookup routine
> > + * create_dir_dentry - Create a directory dentry for the eventfs_inode
> > + * @ei: The eventfs_inode to create the directory for
> > + * @parent: The dentry of the parent of this directory
> > + * @lookup: True if this is called by the lookup code
> >   *
> > - * Used to create a dentry for file/dir, executes post dentry creation 
> > routine
> > + * This creates and attaches a directory dentry to the eventfs_inode @ei.
> >   */
> >  static struct dentry *
> > -create_dentry(struct eventfs_file *ef, struct dentry *parent, bool lookup)
> > +create_dir_dentry(struct eventfs_inode *ei, struct dentry *parent, bool 
> > lookup)
> >  {
> >     bool invalidate = false;
> > -   struct dentry *dentry;
> > +   struct dentry *dentry = NULL;
> >  
> >     mutex_lock(&eventfs_mutex);
> > -   if (ef->is_freed) {
> > -           mutex_unlock(&eventfs_mutex);
> > -           return NULL;
> > -   }
> > -   if (ef->dentry) {
> > -           dentry = ef->dentry;
> > -           /* On dir open, up the ref count */
> > +   if (ei->dentry) {
> > +           /* If the dentry already has a dentry, use it */
> > +           dentry = ei->dentry;
> > +           /* lookup does not need to up the ref count */
> >             if (!lookup)
> >                     dget(dentry);
> >             mutex_unlock(&eventfs_mutex);
> > @@ -302,42 +343,44 @@ create_dentry(struct eventfs_file *ef, struct dentry 
> > *parent, bool lookup)
> >     }
> >     mutex_unlock(&eventfs_mutex);
> >  
> > +   /* The lookup already has the parent->d_inode locked */
> >     if (!lookup)
> >             inode_lock(parent->d_inode);
> >  
> > -   if (ef->ei)
> > -           dentry = create_dir(ef->name, parent, ef->data);
> > -   else
> > -           dentry = create_file(ef->name, ef->mode, parent,
> > -                                ef->data, ef->fop);
> > +   dentry = create_dir(ei->name, parent);
> >  
> >     if (!lookup)
> >             inode_unlock(parent->d_inode);
> >  
> >     mutex_lock(&eventfs_mutex);
> > -   if (IS_ERR_OR_NULL(dentry)) {
> > -           /* If the ef was already updated get it */
> > -           dentry = ef->dentry;
> > +
> > +   if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
> > +           /*
> > +            * When the mutex was released, something else could have
> > +            * created the dentry for this e_dentry. In which case
> > +            * use that one.
> > +            *
> > +            * Note, with the mutex held, the e_dentry cannot have content
> > +            * and the ei->is_freed be true at the same time.
> > +            */
> > +           dentry = ei->dentry;
> >             if (dentry && !lookup)
> >                     dget(dentry);
> >             mutex_unlock(&eventfs_mutex);
> >             return dentry;
> >     }
> >  
> > -   if (!ef->dentry && !ef->is_freed) {
> > -           ef->dentry = dentry;
> > -           if (ef->ei)
> > -                   eventfs_post_create_dir(ef);
> > -           dentry->d_fsdata = ef;
> > +   if (!ei->dentry && !ei->is_freed) {
> > +           ei->dentry = dentry;
> > +           eventfs_post_create_dir(ei);
> > +           dentry->d_fsdata = ei;  
> 
> Ditto.

And again, the LSB is set after ei->is_freed is set.

> 
> >     } else {
> > -           /* A race here, should try again (unless freed) */
> > -           invalidate = true;
> > -
> >             /*
> >              * Should never happen unless we get here due to being freed.
> >              * Otherwise it means two dentries exist with the same name.
> >              */
> > -           WARN_ON_ONCE(!ef->is_freed);
> > +           WARN_ON_ONCE(!ei->is_freed);
> > +           invalidate = true;
> >     }
> >     mutex_unlock(&eventfs_mutex);
> >     if (invalidate)
> > @@ -349,50 +392,82 @@ create_dentry(struct eventfs_file *ef, struct dentry 
> > *parent, bool lookup)
> >     return invalidate ? NULL : dentry;
> >  }
> >  
> > -static bool match_event_file(struct eventfs_file *ef, const char *name)
> > -{
> > -   bool ret;
> > -
> > -   mutex_lock(&eventfs_mutex);
> > -   ret = !ef->is_freed && strcmp(ef->name, name) == 0;
> > -   mutex_unlock(&eventfs_mutex);
> > -
> > -   return ret;
> > -}
> > -
> >  /**
> >   * eventfs_root_lookup - lookup routine to create file/dir
> >   * @dir: in which a lookup is being done
> >   * @dentry: file/dir dentry
> > - * @flags: to pass as flags parameter to simple lookup
> > + * @flags: Just passed to simple_lookup()
> >   *
> > - * Used to create a dynamic file/dir within @dir. Use the eventfs_inode
> > - * list of meta data to find the information needed to create the file/dir.
> > + * Used to create dynamic file/dir with-in @dir, search with-in @ei
> > + * list, if @dentry found go ahead and create the file/dir
> >   */
> > +
> >  static struct dentry *eventfs_root_lookup(struct inode *dir,
> >                                       struct dentry *dentry,
> >                                       unsigned int flags)
> >  {
> > +   const struct file_operations *fops;
> > +   const struct eventfs_entry *entry;
> > +   struct eventfs_inode *ei_child;
> >     struct tracefs_inode *ti;
> >     struct eventfs_inode *ei;
> > -   struct eventfs_file *ef;
> >     struct dentry *ret = NULL;
> > +   const char *name = dentry->d_name.name;
> > +   bool created = false;
> > +   umode_t mode;
> > +   void *data;
> >     int idx;
> > +   int i;
> > +   int r;
> >  
> >     ti = get_tracefs(dir);
> >     if (!(ti->flags & TRACEFS_EVENT_INODE))
> >             return NULL;
> >  
> > -   ei = ti->private;
> > +   /* Grab srcu to prevent the ei from going away */
> >     idx = srcu_read_lock(&eventfs_srcu);
> > -   list_for_each_entry_srcu(ef, &ei->e_top_files, list,
> > +
> > +   /*
> > +    * Grab the eventfs_mutex to consistent value from ti->private.
> > +    * This s
> > +    */
> > +   mutex_lock(&eventfs_mutex);
> > +   ei = READ_ONCE(ti->private);
> > +   mutex_unlock(&eventfs_mutex);
> > +
> > +   if (!ei)
> > +           goto out;
> > +
> > +   data = ei->data;
> > +
> > +   list_for_each_entry_srcu(ei_child, &ei->children, list,
> >                              srcu_read_lock_held(&eventfs_srcu)) {
> > -           if (!match_event_file(ef, dentry->d_name.name))
> > +           if (strcmp(ei_child->name, name) != 0)
> >                     continue;
> >             ret = simple_lookup(dir, dentry, flags);  
> 
> Don't we check this return value?

It's the return code from this function.

It shouldn't affect the next lines.

> 
> > -           create_dentry(ef, ef->d_parent, true);
> > +           create_dir_dentry(ei_child, ei->dentry, true);
> > +           created = true;
> >             break;
> >     }
> > +
> > +   if (created)
> > +           goto out;
> > +
> > +   for (i = 0; i < ei->nr_entries; i++) {
> > +           entry = &ei->entries[i];
> > +           if (strcmp(name, entry->name) == 0) {
> > +                   void *cdata = data;
> > +                   r = entry->callback(name, &mode, &cdata, &fops);
> > +                   if (r <= 0)
> > +                           continue;
> > +                   ret = simple_lookup(dir, dentry, flags);  
> 
> Ditto.

The same as above. It's just the return code of the function.

> 
> > +                   create_file_dentry(ei, &ei->d_children[i],
> > +                                      ei->dentry, name, mode, cdata,
> > +                                      fops, true);
> > +                   break;
> > +           }
> > +   }
> > + out:
> >     srcu_read_unlock(&eventfs_srcu, idx);
> >     return ret;
> >  }  
> [...]
> > @@ -2400,15 +2416,134 @@ event_define_fields(struct trace_event_call *call)
> >     return ret;
> >  }
> >  
> > +static int event_callback(const char *name, umode_t *mode, void **data,
> > +                     const struct file_operations **fops)
> > +{
> > +   struct trace_event_file *file = *data;
> > +   struct trace_event_call *call = file->event_call;
> > +
> > +   if (strcmp(name, "format") == 0) {
> > +           *mode = TRACE_MODE_READ;
> > +           *fops = &ftrace_event_format_fops;
> > +           *data = call;
> > +           return 1;
> > +   }
> > +
> > +   /*
> > +    * Only event directories that can be enabled should have
> > +    * triggers or filters, with the exception of the "print"
> > +    * event that can have a "trigger" file.
> > +    */
> > +   if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
> > +           if (call->class->reg && strcmp(name, "enable") == 0) {
> > +                   *mode = TRACE_MODE_WRITE;
> > +                   *fops = &ftrace_enable_fops;
> > +                   return 1;
> > +           }
> > +
> > +           if (strcmp(name, "filter") == 0) {
> > +                   *mode = TRACE_MODE_WRITE;
> > +                   *fops = &ftrace_event_filter_fops;
> > +                   return 1;
> > +           }
> > +   }
> > +
> > +   if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
> > +       strcmp(trace_event_name(call), "print") == 0) {
> > +           if (strcmp(name, "trigger") == 0) {
> > +                   *mode = TRACE_MODE_WRITE;
> > +                   *fops = &event_trigger_fops;
> > +                   return 1;
> > +           }
> > +   }
> > +
> > +#ifdef CONFIG_PERF_EVENTS
> > +   if (call->event.type && call->class->reg &&
> > +       strcmp(name, "id") == 0) {
> > +           *mode = TRACE_MODE_READ;
> > +           *data = (void *)(long)call->event.type;
> > +           *fops = &ftrace_event_id_fops;
> > +           return 1;
> > +   }
> > +#endif
> > +
> > +#ifdef CONFIG_HIST_TRIGGERS
> > +   if (strcmp(name, "hist") == 0) {
> > +           *mode = TRACE_MODE_READ;
> > +           *fops = &event_hist_fops;
> > +           return 1;
> > +   }
> > +#endif
> > +#ifdef CONFIG_HIST_TRIGGERS_DEBUG
> > +   if (strcmp(name, "hist_debug") == 0) {
> > +           *mode = TRACE_MODE_READ;
> > +           *fops = &event_hist_debug_fops;
> > +           return 1;
> > +   }
> > +#endif
> > +#ifdef CONFIG_TRACE_EVENT_INJECT
> > +   if (call->event.type && call->class->reg &&
> > +       strcmp(name, "inject") == 0) {
> > +           *mode = 0200;
> > +           *fops = &event_inject_fops;
> > +           return 1;
> > +   }
> > +#endif
> > +   return 0;
> > +}
> > +
> >  static int
> > -event_create_dir(struct dentry *parent, struct trace_event_file *file)
> > +event_create_dir(struct eventfs_inode *parent, struct trace_event_file 
> > *file)
> >  {
> >     struct trace_event_call *call = file->event_call;
> > -   struct eventfs_file *ef_subsystem = NULL;
> >     struct trace_array *tr = file->tr;
> > -   struct eventfs_file *ef;
> > +   struct eventfs_inode *e_events;
> > +   struct eventfs_inode *ei;
> >     const char *name;
> > +   int nr_entries;
> >     int ret;
> > +   static struct eventfs_entry event_entries[] = {
> > +           {
> > +                   .name           = "enable",
> > +                   .callback       = event_callback,
> > +           },
> > +           {
> > +                   .name           = "filter",
> > +                   .callback       = event_callback,
> > +           },
> > +           {
> > +                   .name           = "trigger",
> > +                   .callback       = event_callback,
> > +           },
> > +           {
> > +                   .name           = "format",
> > +                   .callback       = event_callback,
> > +           },
> > +#ifdef CONFIG_PERF_EVENTS
> > +           {
> > +                   .name           = "id",
> > +                   .callback       = event_callback,
> > +           },
> > +#endif
> > +#ifdef CONFIG_HIST_TRIGGERS
> > +           {
> > +                   .name           = "hist",
> > +                   .callback       = event_callback,
> > +           },
> > +#endif
> > +#ifdef CONFIG_HIST_TRIGGERS_DEBUG
> > +           {
> > +                   .name           = "hist_debug",
> > +                   .callback       = event_callback,
> > +           },
> > +#endif
> > +#ifdef CONFIG_TRACE_EVENT_INJECT
> > +           {
> > +                   .name           = "inject",
> > +                   .callback       = event_callback,
> > +           },
> > +#endif  
> 
> Q: I wonder why these uses the same 'event_callback'? it seems those have
> different callbacks...

It really is just a preference. I could have it use a different callback,
but if you look at the event_callback(), it looks at the name passed in to
determine what to do. So yes, they do different things when the name passed
in is different.


> [...]
> 
> > +static int events_callback(const char *name, umode_t *mode, void **data,
> > +                      const struct file_operations **fops)
> > +{
> > +   if (strcmp(name, "enable") == 0) {
> > +           *mode = TRACE_MODE_WRITE;
> > +           *fops = &ftrace_tr_enable_fops;
> > +           return 1;
> > +   }
> > +
> > +   if (strcmp(name, "header_page") == 0)
> > +           *data = ring_buffer_print_page_header;
> > +
> > +   else if (strcmp(name, "header_event") == 0)
> > +           *data = ring_buffer_print_entry_header;
> > +
> > +   else
> > +           return 0;
> > +
> > +   *mode = TRACE_MODE_READ;
> > +   *fops = &ftrace_show_header_fops;
> > +   return 1;
> > +}
> > +
> >  /* Expects to have event_mutex held when called */
> >  static int
> >  create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
> >  {
> > -   struct dentry *d_events;
> > +   struct eventfs_inode *e_events;
> >     struct dentry *entry;
> > -   int error = 0;
> > +   int nr_entries;
> > +   static struct eventfs_entry events_entries[] = {
> > +           {
> > +                   .name           = "enable",
> > +                   .callback       = events_callback,
> > +           },
> > +           {
> > +                   .name           = "header_page",
> > +                   .callback       = events_callback,
> > +           },
> > +           {
> > +                   .name           = "header_event",
> > +                   .callback       = events_callback,
> > +           },
> > +   };  
> 
> Here too.

Same reason.

> 
> Thank you,
> 

Thanks for reviewing.

-- Steve


Reply via email to