Linus,

This is mostly clean ups and small fixes. Some of the more visible
changes are:

 . The function pid code uses the event pid filtering logic
 . [ku]probe events have access to current->comm
 . trace_printk now has sample code
 . PCI devices now trace physical addresses
 . stack tracing has less unnessary functions traced


Please pull the latest trace-v4.8 tree, which can be found at:


  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
trace-v4.8

Tag SHA1: 50b6c56120fd1bf557fd8771db87c5baf15d57e2
Head SHA1: 78aebca2c955c1c5aeb48e12645e13fe3c3461f2


Andy Lutomirski (1):
      tracing: Choose static tp_printk buffer by explicit nesting count

Bjorn Helgaas (1):
      tracing: Expose CPU physical addresses (resource values) for PCI devices

Daniel Bristot de Oliveira (4):
      tracing: Use outer () on __get_str() definition
      tracing, RAS: Cleanup on __get_str() usage
      tracing: Use __get_str() when manipulating strings
      printk, tracing: Avoiding unneeded blank lines

Joel Fernandes (1):
      tracing/function_graph: Fix filters for function_graph threshold

Namhyung Kim (1):
      ftrace: Reduce size of function graph entries

Omar Sandoval (1):
      tracing: expose current->comm to [ku]probe events

Steven Rostedt (2):
      tracing: Make the pid filtering helper functions global
      tracing: Move filtered_pid helper functions into trace.c

Steven Rostedt (Red Hat) (7):
      tracing: Move the pid_list seq_file functions to be global
      tracing: Move pid_list write processing into its own function
      ftrace: Have set_ftrace_pid use the bitmap like events do
      tracing: Add trace_printk sample code
      tracing: Show the preempt count of when the event was called
      tracing: Skip more functions when doing stack tracing of events
      ftrace: Move toplevel init out of ftrace_init_tracefs()

Tom Zanussi (1):
      tracing: Have HIST_TRIGGERS select TRACING

Wei Yongjun (1):
      tracing: Using for_each_set_bit() to simplify trace_pid_write()

----
 Documentation/trace/kprobetrace.txt  |   3 +
 Documentation/trace/uprobetracer.txt |   3 +
 fs/nfs/nfs4trace.h                   |   4 +-
 fs/nfs/nfstrace.h                    |   4 +-
 include/linux/ftrace.h               |  12 +-
 include/ras/ras_event.h              |   4 +-
 include/trace/events/printk.h        |  12 +-
 include/trace/perf.h                 |   2 +-
 include/trace/trace_events.h         |   2 +-
 kernel/trace/Kconfig                 |   1 +
 kernel/trace/ftrace.c                | 313 ++++++++++++++----------------
 kernel/trace/trace.c                 | 358 +++++++++++++++++++++++++++++------
 kernel/trace/trace.h                 |  48 ++++-
 kernel/trace/trace_entries.h         |   4 +-
 kernel/trace/trace_events.c          | 219 +++------------------
 kernel/trace/trace_functions.c       |   2 +-
 kernel/trace/trace_functions_graph.c |  19 +-
 kernel/trace/trace_kprobe.c          |   1 +
 kernel/trace/trace_mmiotrace.c       |  10 +-
 kernel/trace/trace_probe.c           |  33 ++++
 kernel/trace/trace_probe.h           |  10 +
 samples/Kconfig                      |   7 +
 samples/Makefile                     |   2 +-
 samples/trace_printk/Makefile        |   6 +
 samples/trace_printk/trace-printk.c  |  56 ++++++
 25 files changed, 667 insertions(+), 468 deletions(-)
 create mode 100644 samples/trace_printk/Makefile
 create mode 100644 samples/trace_printk/trace-printk.c
---------------------------
diff --git a/Documentation/trace/kprobetrace.txt 
b/Documentation/trace/kprobetrace.txt
index d68ea5fc812b..ea52ec1f8484 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -40,6 +40,7 @@ Synopsis of kprobe_events
   $stackN      : Fetch Nth entry of stack (N >= 0)
   $stack       : Fetch stack address.
   $retval      : Fetch return value.(*)
+  $comm                : Fetch current task comm.
   +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
@@ -62,6 +63,8 @@ offset, and container-size (usually 32). The syntax is;
 
  b<bit-width>@<bit-offset>/<container-size>
 
+For $comm, the default type is "string"; any other type is invalid.
+
 
 Per-Probe Event Filtering
 -------------------------
diff --git a/Documentation/trace/uprobetracer.txt 
b/Documentation/trace/uprobetracer.txt
index f1cf9a34ad9d..72d1cd4f7bf3 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -36,6 +36,7 @@ Synopsis of uprobe_tracer
    $stackN     : Fetch Nth entry of stack (N >= 0)
    $stack      : Fetch stack address.
    $retval     : Fetch return value.(*)
+   $comm       : Fetch current task comm.
    +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
    NAME=FETCHARG     : Set NAME as the argument name of FETCHARG.
    FETCHARG:TYPE     : Set TYPE as the type of FETCHARG. Currently, basic types
@@ -57,6 +58,8 @@ offset, and container-size (usually 32). The syntax is;
 
  b<bit-width>@<bit-offset>/<container-size>
 
+For $comm, the default type is "string"; any other type is invalid.
+
 
 Event Profiling
 ---------------
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 9c150b153782..cfb8f7ce5cf6 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -1235,8 +1235,8 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event,
                                len = 0;
                        __entry->error = error < 0 ? error : 0;
                        __entry->id = id;
-                       memcpy(__get_dynamic_array(name), name, len);
-                       ((char *)__get_dynamic_array(name))[len] = 0;
+                       memcpy(__get_str(name), name, len);
+                       __get_str(name)[len] = 0;
                ),
 
                TP_printk(
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 0b9e5cc9a747..31c7763b94d5 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -707,9 +707,9 @@ TRACE_EVENT(nfs_sillyrename_unlink,
                        __entry->dev = dir->i_sb->s_dev;
                        __entry->dir = NFS_FILEID(dir);
                        __entry->error = error;
-                       memcpy(__get_dynamic_array(name),
+                       memcpy(__get_str(name),
                                data->args.name.name, len);
-                       ((char *)__get_dynamic_array(name))[len] = 0;
+                       __get_str(name)[len] = 0;
                ),
 
                TP_printk(
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 66a36a815f0a..7d565afe35d2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -754,23 +754,27 @@ static inline void ftrace_init(void) { }
 
 /*
  * Structure that defines an entry function trace.
+ * It's already packed but the attribute "packed" is needed
+ * to remove extra padding at the end.
  */
 struct ftrace_graph_ent {
        unsigned long func; /* Current function */
        int depth;
-};
+} __packed;
 
 /*
  * Structure that defines a return function trace.
+ * It's already packed but the attribute "packed" is needed
+ * to remove extra padding at the end.
  */
 struct ftrace_graph_ret {
        unsigned long func; /* Current function */
-       unsigned long long calltime;
-       unsigned long long rettime;
        /* Number of functions that overran the depth limit for current task */
        unsigned long overrun;
+       unsigned long long calltime;
+       unsigned long long rettime;
        int depth;
-};
+} __packed;
 
 /* Type of the callback handlers for tracing function graph*/
 typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 1443d79e4fe6..1791a12cfa85 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -147,7 +147,7 @@ TRACE_EVENT(mc_event,
                  __entry->error_count,
                  mc_event_error_type(__entry->error_type),
                  __entry->error_count > 1 ? "s" : "",
-                 ((char *)__get_str(msg))[0] ? " " : "",
+                 __get_str(msg)[0] ? " " : "",
                  __get_str(msg),
                  __get_str(label),
                  __entry->mc_index,
@@ -157,7 +157,7 @@ TRACE_EVENT(mc_event,
                  __entry->address,
                  1 << __entry->grain_bits,
                  __entry->syndrome,
-                 ((char *)__get_str(driver_detail))[0] ? " " : "",
+                 __get_str(driver_detail)[0] ? " " : "",
                  __get_str(driver_detail))
 );
 
diff --git a/include/trace/events/printk.h b/include/trace/events/printk.h
index c008bc99f9fa..f350170059c6 100644
--- a/include/trace/events/printk.h
+++ b/include/trace/events/printk.h
@@ -16,8 +16,16 @@ TRACE_EVENT(console,
        ),
 
        TP_fast_assign(
-               memcpy(__get_dynamic_array(msg), text, len);
-               ((char *)__get_dynamic_array(msg))[len] = 0;
+               /*
+                * Each trace entry is printed in a new line.
+                * If the msg finishes with '\n', cut it off
+                * to avoid blank lines in the trace.
+                */
+               if ((len > 0) && (text[len-1] == '\n'))
+                       len -= 1;
+
+               memcpy(__get_str(msg), text, len);
+               __get_str(msg)[len] = 0;
        ),
 
        TP_printk("%s", __get_str(msg))
diff --git a/include/trace/perf.h b/include/trace/perf.h
index 88de5c205e86..04fe68bbe767 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -15,7 +15,7 @@
                ((__entry->__data_loc_##field >> 16) & 0xffff)
 
 #undef __get_str
-#define __get_str(field) (char *)__get_dynamic_array(field)
+#define __get_str(field) ((char *)__get_dynamic_array(field))
 
 #undef __get_bitmask
 #define __get_bitmask(field) (char *)__get_dynamic_array(field)
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 80679a9fae65..467e12f780d8 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -256,7 +256,7 @@ TRACE_MAKE_SYSTEM_STR();
                ((__entry->__data_loc_##field >> 16) & 0xffff)
 
 #undef __get_str
-#define __get_str(field) (char *)__get_dynamic_array(field)
+#define __get_str(field) ((char *)__get_dynamic_array(field))
 
 #undef __get_bitmask
 #define __get_bitmask(field)                                           \
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index fafeaf803bd0..f4b86e8ca1e7 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -542,6 +542,7 @@ config HIST_TRIGGERS
        bool "Histogram triggers"
        depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
        select TRACING_MAP
+       select TRACING
        default n
        help
          Hist triggers allow one or more arbitrary trace event fields
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 900dbb1efff2..84752c8e28b5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -89,16 +89,16 @@ struct ftrace_ops *function_trace_op __read_mostly = 
&ftrace_list_end;
 /* What to set function_trace_op to */
 static struct ftrace_ops *set_function_trace_op;
 
-/* List for set_ftrace_pid's pids. */
-LIST_HEAD(ftrace_pids);
-struct ftrace_pid {
-       struct list_head list;
-       struct pid *pid;
-};
-
-static bool ftrace_pids_enabled(void)
+static bool ftrace_pids_enabled(struct ftrace_ops *ops)
 {
-       return !list_empty(&ftrace_pids);
+       struct trace_array *tr;
+
+       if (!(ops->flags & FTRACE_OPS_FL_PID) || !ops->private)
+               return false;
+
+       tr = ops->private;
+
+       return tr->function_pids != NULL;
 }
 
 static void ftrace_update_trampoline(struct ftrace_ops *ops);
@@ -179,7 +179,9 @@ int ftrace_nr_registered_ops(void)
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
                            struct ftrace_ops *op, struct pt_regs *regs)
 {
-       if (!test_tsk_trace_trace(current))
+       struct trace_array *tr = op->private;
+
+       if (tr && this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid))
                return;
 
        op->saved_func(ip, parent_ip, op, regs);
@@ -417,7 +419,7 @@ static int __register_ftrace_function(struct ftrace_ops 
*ops)
        /* Always save the function, and reset at unregistering */
        ops->saved_func = ops->func;
 
-       if (ops->flags & FTRACE_OPS_FL_PID && ftrace_pids_enabled())
+       if (ftrace_pids_enabled(ops))
                ops->func = ftrace_pid_func;
 
        ftrace_update_trampoline(ops);
@@ -450,7 +452,6 @@ static int __unregister_ftrace_function(struct ftrace_ops 
*ops)
 
 static void ftrace_update_pid_func(void)
 {
-       bool enabled = ftrace_pids_enabled();
        struct ftrace_ops *op;
 
        /* Only do something if we are tracing something */
@@ -459,8 +460,8 @@ static void ftrace_update_pid_func(void)
 
        do_for_each_ftrace_op(op, ftrace_ops_list) {
                if (op->flags & FTRACE_OPS_FL_PID) {
-                       op->func = enabled ? ftrace_pid_func :
-                               op->saved_func;
+                       op->func = ftrace_pids_enabled(op) ?
+                               ftrace_pid_func : op->saved_func;
                        ftrace_update_trampoline(op);
                }
        } while_for_each_ftrace_op(op);
@@ -5324,179 +5325,99 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops 
*ops)
        return ops->func;
 }
 
-static void clear_ftrace_swapper(void)
+static void
+ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
+                   struct task_struct *prev, struct task_struct *next)
 {
-       struct task_struct *p;
-       int cpu;
+       struct trace_array *tr = data;
+       struct trace_pid_list *pid_list;
 
-       get_online_cpus();
-       for_each_online_cpu(cpu) {
-               p = idle_task(cpu);
-               clear_tsk_trace_trace(p);
-       }
-       put_online_cpus();
-}
-
-static void set_ftrace_swapper(void)
-{
-       struct task_struct *p;
-       int cpu;
+       pid_list = rcu_dereference_sched(tr->function_pids);
 
-       get_online_cpus();
-       for_each_online_cpu(cpu) {
-               p = idle_task(cpu);
-               set_tsk_trace_trace(p);
-       }
-       put_online_cpus();
+       this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid,
+                      trace_ignore_this_task(pid_list, next));
 }
 
-static void clear_ftrace_pid(struct pid *pid)
+static void clear_ftrace_pids(struct trace_array *tr)
 {
-       struct task_struct *p;
+       struct trace_pid_list *pid_list;
+       int cpu;
 
-       rcu_read_lock();
-       do_each_pid_task(pid, PIDTYPE_PID, p) {
-               clear_tsk_trace_trace(p);
-       } while_each_pid_task(pid, PIDTYPE_PID, p);
-       rcu_read_unlock();
+       pid_list = rcu_dereference_protected(tr->function_pids,
+                                            lockdep_is_held(&ftrace_lock));
+       if (!pid_list)
+               return;
 
-       put_pid(pid);
-}
+       unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
 
-static void set_ftrace_pid(struct pid *pid)
-{
-       struct task_struct *p;
+       for_each_possible_cpu(cpu)
+               per_cpu_ptr(tr->trace_buffer.data, cpu)->ftrace_ignore_pid = 
false;
 
-       rcu_read_lock();
-       do_each_pid_task(pid, PIDTYPE_PID, p) {
-               set_tsk_trace_trace(p);
-       } while_each_pid_task(pid, PIDTYPE_PID, p);
-       rcu_read_unlock();
-}
+       rcu_assign_pointer(tr->function_pids, NULL);
 
-static void clear_ftrace_pid_task(struct pid *pid)
-{
-       if (pid == ftrace_swapper_pid)
-               clear_ftrace_swapper();
-       else
-               clear_ftrace_pid(pid);
-}
+       /* Wait till all users are no longer using pid filtering */
+       synchronize_sched();
 
-static void set_ftrace_pid_task(struct pid *pid)
-{
-       if (pid == ftrace_swapper_pid)
-               set_ftrace_swapper();
-       else
-               set_ftrace_pid(pid);
+       trace_free_pid_list(pid_list);
 }
 
-static int ftrace_pid_add(int p)
+static void ftrace_pid_reset(struct trace_array *tr)
 {
-       struct pid *pid;
-       struct ftrace_pid *fpid;
-       int ret = -EINVAL;
-
        mutex_lock(&ftrace_lock);
-
-       if (!p)
-               pid = ftrace_swapper_pid;
-       else
-               pid = find_get_pid(p);
-
-       if (!pid)
-               goto out;
-
-       ret = 0;
-
-       list_for_each_entry(fpid, &ftrace_pids, list)
-               if (fpid->pid == pid)
-                       goto out_put;
-
-       ret = -ENOMEM;
-
-       fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
-       if (!fpid)
-               goto out_put;
-
-       list_add(&fpid->list, &ftrace_pids);
-       fpid->pid = pid;
-
-       set_ftrace_pid_task(pid);
+       clear_ftrace_pids(tr);
 
        ftrace_update_pid_func();
-
        ftrace_startup_all(0);
 
        mutex_unlock(&ftrace_lock);
-       return 0;
-
-out_put:
-       if (pid != ftrace_swapper_pid)
-               put_pid(pid);
-
-out:
-       mutex_unlock(&ftrace_lock);
-       return ret;
 }
 
-static void ftrace_pid_reset(void)
-{
-       struct ftrace_pid *fpid, *safe;
-
-       mutex_lock(&ftrace_lock);
-       list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
-               struct pid *pid = fpid->pid;
-
-               clear_ftrace_pid_task(pid);
-
-               list_del(&fpid->list);
-               kfree(fpid);
-       }
-
-       ftrace_update_pid_func();
-       ftrace_startup_all(0);
-
-       mutex_unlock(&ftrace_lock);
-}
+/* Greater than any max PID */
+#define FTRACE_NO_PIDS         (void *)(PID_MAX_LIMIT + 1)
 
 static void *fpid_start(struct seq_file *m, loff_t *pos)
+       __acquires(RCU)
 {
+       struct trace_pid_list *pid_list;
+       struct trace_array *tr = m->private;
+
        mutex_lock(&ftrace_lock);
+       rcu_read_lock_sched();
 
-       if (!ftrace_pids_enabled() && (!*pos))
-               return (void *) 1;
+       pid_list = rcu_dereference_sched(tr->function_pids);
 
-       return seq_list_start(&ftrace_pids, *pos);
+       if (!pid_list)
+               return !(*pos) ? FTRACE_NO_PIDS : NULL;
+
+       return trace_pid_start(pid_list, pos);
 }
 
 static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
 {
-       if (v == (void *)1)
+       struct trace_array *tr = m->private;
+       struct trace_pid_list *pid_list = 
rcu_dereference_sched(tr->function_pids);
+
+       if (v == FTRACE_NO_PIDS)
                return NULL;
 
-       return seq_list_next(v, &ftrace_pids, pos);
+       return trace_pid_next(pid_list, v, pos);
 }
 
 static void fpid_stop(struct seq_file *m, void *p)
+       __releases(RCU)
 {
+       rcu_read_unlock_sched();
        mutex_unlock(&ftrace_lock);
 }
 
 static int fpid_show(struct seq_file *m, void *v)
 {
-       const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
-
-       if (v == (void *)1) {
+       if (v == FTRACE_NO_PIDS) {
                seq_puts(m, "no pid\n");
                return 0;
        }
 
-       if (fpid->pid == ftrace_swapper_pid)
-               seq_puts(m, "swapper tasks\n");
-       else
-               seq_printf(m, "%u\n", pid_vnr(fpid->pid));
-
-       return 0;
+       return trace_pid_show(m, v);
 }
 
 static const struct seq_operations ftrace_pid_sops = {
@@ -5509,58 +5430,103 @@ static const struct seq_operations ftrace_pid_sops = {
 static int
 ftrace_pid_open(struct inode *inode, struct file *file)
 {
+       struct trace_array *tr = inode->i_private;
+       struct seq_file *m;
        int ret = 0;
 
+       if (trace_array_get(tr) < 0)
+               return -ENODEV;
+
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC))
-               ftrace_pid_reset();
+               ftrace_pid_reset(tr);
 
-       if (file->f_mode & FMODE_READ)
-               ret = seq_open(file, &ftrace_pid_sops);
+       ret = seq_open(file, &ftrace_pid_sops);
+       if (ret < 0) {
+               trace_array_put(tr);
+       } else {
+               m = file->private_data;
+               /* copy tr over to seq ops */
+               m->private = tr;
+       }
 
        return ret;
 }
 
+static void ignore_task_cpu(void *data)
+{
+       struct trace_array *tr = data;
+       struct trace_pid_list *pid_list;
+
+       /*
+        * This function is called by on_each_cpu() while the
+        * event_mutex is held.
+        */
+       pid_list = rcu_dereference_protected(tr->function_pids,
+                                            mutex_is_locked(&ftrace_lock));
+
+       this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid,
+                      trace_ignore_this_task(pid_list, current));
+}
+
 static ssize_t
 ftrace_pid_write(struct file *filp, const char __user *ubuf,
                   size_t cnt, loff_t *ppos)
 {
-       char buf[64], *tmp;
-       long val;
-       int ret;
+       struct seq_file *m = filp->private_data;
+       struct trace_array *tr = m->private;
+       struct trace_pid_list *filtered_pids = NULL;
+       struct trace_pid_list *pid_list;
+       ssize_t ret;
 
-       if (cnt >= sizeof(buf))
-               return -EINVAL;
+       if (!cnt)
+               return 0;
+
+       mutex_lock(&ftrace_lock);
+
+       filtered_pids = rcu_dereference_protected(tr->function_pids,
+                                            lockdep_is_held(&ftrace_lock));
+
+       ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
+       if (ret < 0)
+               goto out;
 
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
+       rcu_assign_pointer(tr->function_pids, pid_list);
 
-       buf[cnt] = 0;
+       if (filtered_pids) {
+               synchronize_sched();
+               trace_free_pid_list(filtered_pids);
+       } else if (pid_list) {
+               /* Register a probe to set whether to ignore the tracing of a 
task */
+               
register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
+       }
 
        /*
-        * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
-        * to clean the filter quietly.
+        * Ignoring of pids is done at task switch. But we have to
+        * check for those tasks that are currently running.
+        * Always do this in case a pid was appended or removed.
         */
-       tmp = strstrip(buf);
-       if (strlen(tmp) == 0)
-               return 1;
+       on_each_cpu(ignore_task_cpu, tr, 1);
 
-       ret = kstrtol(tmp, 10, &val);
-       if (ret < 0)
-               return ret;
+       ftrace_update_pid_func();
+       ftrace_startup_all(0);
+ out:
+       mutex_unlock(&ftrace_lock);
 
-       ret = ftrace_pid_add(val);
+       if (ret > 0)
+               *ppos += ret;
 
-       return ret ? ret : cnt;
+       return ret;
 }
 
 static int
 ftrace_pid_release(struct inode *inode, struct file *file)
 {
-       if (file->f_mode & FMODE_READ)
-               seq_release(inode, file);
+       struct trace_array *tr = inode->i_private;
 
-       return 0;
+       trace_array_put(tr);
+
+       return seq_release(inode, file);
 }
 
 static const struct file_operations ftrace_pid_fops = {
@@ -5571,24 +5537,21 @@ static const struct file_operations ftrace_pid_fops = {
        .release        = ftrace_pid_release,
 };
 
-static __init int ftrace_init_tracefs(void)
+void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 {
-       struct dentry *d_tracer;
+       trace_create_file("set_ftrace_pid", 0644, d_tracer,
+                           tr, &ftrace_pid_fops);
+}
 
-       d_tracer = tracing_init_dentry();
-       if (IS_ERR(d_tracer))
-               return 0;
+void __init ftrace_init_tracefs_toplevel(struct trace_array *tr,
+                                        struct dentry *d_tracer)
+{
+       /* Only the top level directory has the dyn_tracefs and profile */
+       WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL));
 
        ftrace_init_dyn_tracefs(d_tracer);
-
-       trace_create_file("set_ftrace_pid", 0644, d_tracer,
-                           NULL, &ftrace_pid_fops);
-
        ftrace_profile_tracefs(d_tracer);
-
-       return 0;
 }
-fs_initcall(ftrace_init_tracefs);
 
 /**
  * ftrace_kill - kill ftrace
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a4bd6b68a0b..dade4c9559cc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -25,7 +25,7 @@
 #include <linux/hardirq.h>
 #include <linux/linkage.h>
 #include <linux/uaccess.h>
-#include <linux/kprobes.h>
+#include <linux/vmalloc.h>
 #include <linux/ftrace.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
@@ -319,6 +319,258 @@ int call_filter_check_discard(struct trace_event_call 
*call, void *rec,
        return 0;
 }
 
+void trace_free_pid_list(struct trace_pid_list *pid_list)
+{
+       vfree(pid_list->pids);
+       kfree(pid_list);
+}
+
+/**
+ * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
+ * @filtered_pids: The list of pids to check
+ * @search_pid: The PID to find in @filtered_pids
+ *
+ * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
+ */
+bool
+trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
+{
+       /*
+        * If pid_max changed after filtered_pids was created, we
+        * by default ignore all pids greater than the previous pid_max.
+        */
+       if (search_pid >= filtered_pids->pid_max)
+               return false;
+
+       return test_bit(search_pid, filtered_pids->pids);
+}
+
+/**
+ * trace_ignore_this_task - should a task be ignored for tracing
+ * @filtered_pids: The list of pids to check
+ * @task: The task that should be ignored if not filtered
+ *
+ * Checks if @task should be traced or not from @filtered_pids.
+ * Returns true if @task should *NOT* be traced.
+ * Returns false if @task should be traced.
+ */
+bool
+trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct 
task_struct *task)
+{
+       /*
+        * Return false, because if filtered_pids does not exist,
+        * all pids are good to trace.
+        */
+       if (!filtered_pids)
+               return false;
+
+       return !trace_find_filtered_pid(filtered_pids, task->pid);
+}
+
+/**
+ * trace_pid_filter_add_remove - Add or remove a task from a pid_list
+ * @pid_list: The list to modify
+ * @self: The current task for fork or NULL for exit
+ * @task: The task to add or remove
+ *
+ * If adding a task, if @self is defined, the task is only added if @self
+ * is also included in @pid_list. This happens on fork and tasks should
+ * only be added when the parent is listed. If @self is NULL, then the
+ * @task pid will be removed from the list, which would happen on exit
+ * of a task.
+ */
+void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
+                                 struct task_struct *self,
+                                 struct task_struct *task)
+{
+       if (!pid_list)
+               return;
+
+       /* For forks, we only add if the forking task is listed */
+       if (self) {
+               if (!trace_find_filtered_pid(pid_list, self->pid))
+                       return;
+       }
+
+       /* Sorry, but we don't support pid_max changing after setting */
+       if (task->pid >= pid_list->pid_max)
+               return;
+
+       /* "self" is set for forks, and NULL for exits */
+       if (self)
+               set_bit(task->pid, pid_list->pids);
+       else
+               clear_bit(task->pid, pid_list->pids);
+}
+
+/**
+ * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
+ * @pid_list: The pid list to show
+ * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
+ * @pos: The position of the file
+ *
+ * This is used by the seq_file "next" operation to iterate the pids
+ * listed in a trace_pid_list structure.
+ *
+ * Returns the pid+1 as we want to display pid of zero, but NULL would
+ * stop the iteration.
+ */
+void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
+{
+       unsigned long pid = (unsigned long)v;
+
+       (*pos)++;
+
+       /* pid already is +1 of the actual prevous bit */
+       pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
+
+       /* Return pid + 1 to allow zero to be represented */
+       if (pid < pid_list->pid_max)
+               return (void *)(pid + 1);
+
+       return NULL;
+}
+
+/**
+ * trace_pid_start - Used for seq_file to start reading pid lists
+ * @pid_list: The pid list to show
+ * @pos: The position of the file
+ *
+ * This is used by seq_file "start" operation to start the iteration
+ * of listing pids.
+ *
+ * Returns the pid+1 as we want to display pid of zero, but NULL would
+ * stop the iteration.
+ */
+void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
+{
+       unsigned long pid;
+       loff_t l = 0;
+
+       pid = find_first_bit(pid_list->pids, pid_list->pid_max);
+       if (pid >= pid_list->pid_max)
+               return NULL;
+
+       /* Return pid + 1 so that zero can be the exit value */
+       for (pid++; pid && l < *pos;
+            pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
+               ;
+       return (void *)pid;
+}
+
+/**
+ * trace_pid_show - show the current pid in seq_file processing
+ * @m: The seq_file structure to write into
+ * @v: A void pointer of the pid (+1) value to display
+ *
+ * Can be directly used by seq_file operations to display the current
+ * pid value.
+ */
+int trace_pid_show(struct seq_file *m, void *v)
+{
+       unsigned long pid = (unsigned long)v - 1;
+
+       seq_printf(m, "%lu\n", pid);
+       return 0;
+}
+
+/* 128 should be much more than enough */
+#define PID_BUF_SIZE           127
+
+int trace_pid_write(struct trace_pid_list *filtered_pids,
+                   struct trace_pid_list **new_pid_list,
+                   const char __user *ubuf, size_t cnt)
+{
+       struct trace_pid_list *pid_list;
+       struct trace_parser parser;
+       unsigned long val;
+       int nr_pids = 0;
+       ssize_t read = 0;
+       ssize_t ret = 0;
+       loff_t pos;
+       pid_t pid;
+
+       if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
+               return -ENOMEM;
+
+       /*
+        * Always recreate a new array. The write is an all or nothing
+        * operation. Always create a new array when adding new pids by
+        * the user. If the operation fails, then the current list is
+        * not modified.
+        */
+       pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
+       if (!pid_list)
+               return -ENOMEM;
+
+       pid_list->pid_max = READ_ONCE(pid_max);
+
+       /* Only truncating will shrink pid_max */
+       if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
+               pid_list->pid_max = filtered_pids->pid_max;
+
+       pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
+       if (!pid_list->pids) {
+               kfree(pid_list);
+               return -ENOMEM;
+       }
+
+       if (filtered_pids) {
+               /* copy the current bits to the new max */
+               for_each_set_bit(pid, filtered_pids->pids,
+                                filtered_pids->pid_max) {
+                       set_bit(pid, pid_list->pids);
+                       nr_pids++;
+               }
+       }
+
+       while (cnt > 0) {
+
+               pos = 0;
+
+               ret = trace_get_user(&parser, ubuf, cnt, &pos);
+               if (ret < 0 || !trace_parser_loaded(&parser))
+                       break;
+
+               read += ret;
+               ubuf += ret;
+               cnt -= ret;
+
+               parser.buffer[parser.idx] = 0;
+
+               ret = -EINVAL;
+               if (kstrtoul(parser.buffer, 0, &val))
+                       break;
+               if (val >= pid_list->pid_max)
+                       break;
+
+               pid = (pid_t)val;
+
+               set_bit(pid, pid_list->pids);
+               nr_pids++;
+
+               trace_parser_clear(&parser);
+               ret = 0;
+       }
+       trace_parser_put(&parser);
+
+       if (ret < 0) {
+               trace_free_pid_list(pid_list);
+               return ret;
+       }
+
+       if (!nr_pids) {
+               /* Cleared the list of pids */
+               trace_free_pid_list(pid_list);
+               read = ret;
+               pid_list = NULL;
+       }
+
+       *new_pid_list = pid_list;
+
+       return read;
+}
+
 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 {
        u64 ts;
@@ -1862,7 +2114,17 @@ void trace_buffer_unlock_commit_regs(struct trace_array 
*tr,
 {
        __buffer_unlock_commit(buffer, event);
 
-       ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
+       /*
+        * If regs is not set, then skip the following callers:
+        *   trace_buffer_unlock_commit_regs
+        *   event_trigger_unlock_commit
+        *   trace_event_buffer_commit
+        *   trace_event_raw_event_sched_switch
+        * Note, we can still get here via blktrace, wakeup tracer
+        * and mmiotrace, but that's ok if they lose a function or
+        * two. They are that meaningful.
+        */
+       ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
        ftrace_trace_userstack(buffer, flags, pc);
 }
 
@@ -1913,6 +2175,13 @@ static void __ftrace_trace_stack(struct ring_buffer 
*buffer,
        trace.skip              = skip;
 
        /*
+        * Add two, for this function and the call to save_stack_trace()
+        * If regs is set, then these functions will not be in the way.
+        */
+       if (!regs)
+               trace.skip += 2;
+
+       /*
         * Since events can happen in NMIs there's no safe way to
         * use the per cpu ftrace_stacks. We reserve it and if an interrupt
         * or NMI comes in, it will just have to use the default
@@ -2083,83 +2352,41 @@ static void __trace_userstack(struct trace_array *tr, 
unsigned long flags)
 
 /* created for use with alloc_percpu */
 struct trace_buffer_struct {
-       char buffer[TRACE_BUF_SIZE];
+       int nesting;
+       char buffer[4][TRACE_BUF_SIZE];
 };
 
 static struct trace_buffer_struct *trace_percpu_buffer;
-static struct trace_buffer_struct *trace_percpu_sirq_buffer;
-static struct trace_buffer_struct *trace_percpu_irq_buffer;
-static struct trace_buffer_struct *trace_percpu_nmi_buffer;
 
 /*
- * The buffer used is dependent on the context. There is a per cpu
- * buffer for normal context, softirq contex, hard irq context and
- * for NMI context. Thise allows for lockless recording.
- *
- * Note, if the buffers failed to be allocated, then this returns NULL
+ * Thise allows for lockless recording.  If we're nested too deeply, then
+ * this returns NULL.
  */
 static char *get_trace_buf(void)
 {
-       struct trace_buffer_struct *percpu_buffer;
-
-       /*
-        * If we have allocated per cpu buffers, then we do not
-        * need to do any locking.
-        */
-       if (in_nmi())
-               percpu_buffer = trace_percpu_nmi_buffer;
-       else if (in_irq())
-               percpu_buffer = trace_percpu_irq_buffer;
-       else if (in_softirq())
-               percpu_buffer = trace_percpu_sirq_buffer;
-       else
-               percpu_buffer = trace_percpu_buffer;
+       struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
 
-       if (!percpu_buffer)
+       if (!buffer || buffer->nesting >= 4)
                return NULL;
 
-       return this_cpu_ptr(&percpu_buffer->buffer[0]);
+       return &buffer->buffer[buffer->nesting++][0];
+}
+
+static void put_trace_buf(void)
+{
+       this_cpu_dec(trace_percpu_buffer->nesting);
 }
 
 static int alloc_percpu_trace_buffer(void)
 {
        struct trace_buffer_struct *buffers;
-       struct trace_buffer_struct *sirq_buffers;
-       struct trace_buffer_struct *irq_buffers;
-       struct trace_buffer_struct *nmi_buffers;
 
        buffers = alloc_percpu(struct trace_buffer_struct);
-       if (!buffers)
-               goto err_warn;
-
-       sirq_buffers = alloc_percpu(struct trace_buffer_struct);
-       if (!sirq_buffers)
-               goto err_sirq;
-
-       irq_buffers = alloc_percpu(struct trace_buffer_struct);
-       if (!irq_buffers)
-               goto err_irq;
-
-       nmi_buffers = alloc_percpu(struct trace_buffer_struct);
-       if (!nmi_buffers)
-               goto err_nmi;
+       if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
+               return -ENOMEM;
 
        trace_percpu_buffer = buffers;
-       trace_percpu_sirq_buffer = sirq_buffers;
-       trace_percpu_irq_buffer = irq_buffers;
-       trace_percpu_nmi_buffer = nmi_buffers;
-
        return 0;
-
- err_nmi:
-       free_percpu(irq_buffers);
- err_irq:
-       free_percpu(sirq_buffers);
- err_sirq:
-       free_percpu(buffers);
- err_warn:
-       WARN(1, "Could not allocate percpu trace_printk buffer");
-       return -ENOMEM;
 }
 
 static int buffers_allocated;
@@ -2250,7 +2477,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, 
va_list args)
        tbuffer = get_trace_buf();
        if (!tbuffer) {
                len = 0;
-               goto out;
+               goto out_nobuffer;
        }
 
        len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, 
args);
@@ -2276,6 +2503,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, 
va_list args)
        }
 
 out:
+       put_trace_buf();
+
+out_nobuffer:
        preempt_enable_notrace();
        unpause_graph_tracing();
 
@@ -2307,7 +2537,7 @@ __trace_array_vprintk(struct ring_buffer *buffer,
        tbuffer = get_trace_buf();
        if (!tbuffer) {
                len = 0;
-               goto out;
+               goto out_nobuffer;
        }
 
        len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
@@ -2326,7 +2556,11 @@ __trace_array_vprintk(struct ring_buffer *buffer,
                __buffer_unlock_commit(buffer, event);
                ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
        }
- out:
+
+out:
+       put_trace_buf();
+
+out_nobuffer:
        preempt_enable_notrace();
        unpause_graph_tracing();
 
@@ -6977,6 +7211,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry 
*d_tracer)
        for_each_tracing_cpu(cpu)
                tracing_init_tracefs_percpu(tr, cpu);
 
+       ftrace_init_tracefs(tr, d_tracer);
 }
 
 static struct vfsmount *trace_automount(void *ingore)
@@ -7130,6 +7365,7 @@ static __init int tracer_init_tracefs(void)
                return 0;
 
        init_tracer_tracefs(&global_trace, d_tracer);
+       ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
 
        trace_create_file("tracing_thresh", 0644, d_tracer,
                        &global_trace, &tracing_thresh_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5167c366d6b7..f783df416726 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -80,6 +80,12 @@ enum trace_type {
        FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
                     filter)
 
+#undef FTRACE_ENTRY_PACKED
+#define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print,     \
+                           filter)                                     \
+       FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+                    filter) __packed
+
 #include "trace_entries.h"
 
 /*
@@ -156,6 +162,9 @@ struct trace_array_cpu {
        char                    comm[TASK_COMM_LEN];
 
        bool                    ignore_pid;
+#ifdef CONFIG_FUNCTION_TRACER
+       bool                    ftrace_ignore_pid;
+#endif
 };
 
 struct tracer;
@@ -247,6 +256,7 @@ struct trace_array {
        int                     ref;
 #ifdef CONFIG_FUNCTION_TRACER
        struct ftrace_ops       *ops;
+       struct trace_pid_list   __rcu *function_pids;
        /* function tracing enabled */
        int                     function_enabled;
 #endif
@@ -628,6 +638,25 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs);
 
 extern unsigned long tracing_thresh;
 
+/* PID filtering */
+
+extern int pid_max;
+
+bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
+                            pid_t search_pid);
+bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
+                           struct task_struct *task);
+void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
+                                 struct task_struct *self,
+                                 struct task_struct *task);
+void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos);
+void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos);
+int trace_pid_show(struct seq_file *m, void *v);
+void trace_free_pid_list(struct trace_pid_list *pid_list);
+int trace_pid_write(struct trace_pid_list *filtered_pids,
+                   struct trace_pid_list **new_pid_list,
+                   const char __user *ubuf, size_t cnt);
+
 #ifdef CONFIG_TRACER_MAX_TRACE
 void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
 void update_max_tr_single(struct trace_array *tr,
@@ -821,12 +850,9 @@ extern struct list_head ftrace_pids;
 
 #ifdef CONFIG_FUNCTION_TRACER
 extern bool ftrace_filter_param __initdata;
-static inline int ftrace_trace_task(struct task_struct *task)
+static inline int ftrace_trace_task(struct trace_array *tr)
 {
-       if (list_empty(&ftrace_pids))
-               return 1;
-
-       return test_tsk_trace_trace(task);
+       return !this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid);
 }
 extern int ftrace_is_dead(void);
 int ftrace_create_function_files(struct trace_array *tr,
@@ -836,8 +862,11 @@ void ftrace_init_global_array_ops(struct trace_array *tr);
 void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
 void ftrace_reset_array_ops(struct trace_array *tr);
 int using_ftrace_ops_list_func(void);
+void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer);
+void ftrace_init_tracefs_toplevel(struct trace_array *tr,
+                                 struct dentry *d_tracer);
 #else
-static inline int ftrace_trace_task(struct task_struct *task)
+static inline int ftrace_trace_task(struct trace_array *tr)
 {
        return 1;
 }
@@ -852,6 +881,8 @@ static inline void ftrace_destroy_function_files(struct 
trace_array *tr) { }
 static inline __init void
 ftrace_init_global_array_ops(struct trace_array *tr) { }
 static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
+static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry 
*d) { }
+static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct 
dentry *d) { }
 /* ftace_func_t type is not defined, use macro instead of static inline */
 #define ftrace_init_array_ops(tr, func) do { } while (0)
 #endif /* CONFIG_FUNCTION_TRACER */
@@ -1600,6 +1631,11 @@ int set_tracer_flag(struct trace_array *tr, unsigned int 
mask, int enabled);
 #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter)        
\
        FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
                     filter)
+#undef FTRACE_ENTRY_PACKED
+#define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print, filter) \
+       FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+                    filter)
+
 #include "trace_entries.h"
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER)
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ee7b94a4810a..5c30efcda5e6 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -72,7 +72,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
 );
 
 /* Function call entry */
-FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
+FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
 
        TRACE_GRAPH_ENT,
 
@@ -88,7 +88,7 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
 );
 
 /* Function return entry */
-FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
+FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
 
        TRACE_GRAPH_RET,
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3d4155892a1e..03c0a48c3ac4 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,7 +15,6 @@
 #include <linux/kthread.h>
 #include <linux/tracefs.h>
 #include <linux/uaccess.h>
-#include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
 #include <linux/sort.h>
@@ -262,6 +261,14 @@ void *trace_event_buffer_reserve(struct trace_event_buffer 
*fbuffer,
 
        local_save_flags(fbuffer->flags);
        fbuffer->pc = preempt_count();
+       /*
+        * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
+        * preemption (adding one to the preempt_count). Since we are
+        * interested in the preempt_count at the time the tracepoint was
+        * hit, we need to subtract one to offset the increment.
+        */
+       if (IS_ENABLED(CONFIG_PREEMPT))
+               fbuffer->pc--;
        fbuffer->trace_file = trace_file;
 
        fbuffer->event =
@@ -499,60 +506,6 @@ static void ftrace_clear_events(struct trace_array *tr)
        mutex_unlock(&event_mutex);
 }
 
-/* Shouldn't this be in a header? */
-extern int pid_max;
-
-/* Returns true if found in filter */
-static bool
-find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
-{
-       /*
-        * If pid_max changed after filtered_pids was created, we
-        * by default ignore all pids greater than the previous pid_max.
-        */
-       if (search_pid >= filtered_pids->pid_max)
-               return false;
-
-       return test_bit(search_pid, filtered_pids->pids);
-}
-
-static bool
-ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct 
*task)
-{
-       /*
-        * Return false, because if filtered_pids does not exist,
-        * all pids are good to trace.
-        */
-       if (!filtered_pids)
-               return false;
-
-       return !find_filtered_pid(filtered_pids, task->pid);
-}
-
-static void filter_add_remove_task(struct trace_pid_list *pid_list,
-                                  struct task_struct *self,
-                                  struct task_struct *task)
-{
-       if (!pid_list)
-               return;
-
-       /* For forks, we only add if the forking task is listed */
-       if (self) {
-               if (!find_filtered_pid(pid_list, self->pid))
-                       return;
-       }
-
-       /* Sorry, but we don't support pid_max changing after setting */
-       if (task->pid >= pid_list->pid_max)
-               return;
-
-       /* "self" is set for forks, and NULL for exits */
-       if (self)
-               set_bit(task->pid, pid_list->pids);
-       else
-               clear_bit(task->pid, pid_list->pids);
-}
-
 static void
 event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
 {
@@ -560,7 +513,7 @@ event_filter_pid_sched_process_exit(void *data, struct 
task_struct *task)
        struct trace_array *tr = data;
 
        pid_list = rcu_dereference_sched(tr->filtered_pids);
-       filter_add_remove_task(pid_list, NULL, task);
+       trace_filter_add_remove_task(pid_list, NULL, task);
 }
 
 static void
@@ -572,7 +525,7 @@ event_filter_pid_sched_process_fork(void *data,
        struct trace_array *tr = data;
 
        pid_list = rcu_dereference_sched(tr->filtered_pids);
-       filter_add_remove_task(pid_list, self, task);
+       trace_filter_add_remove_task(pid_list, self, task);
 }
 
 void trace_event_follow_fork(struct trace_array *tr, bool enable)
@@ -600,8 +553,8 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool 
preempt,
        pid_list = rcu_dereference_sched(tr->filtered_pids);
 
        this_cpu_write(tr->trace_buffer.data->ignore_pid,
-                      ignore_this_task(pid_list, prev) &&
-                      ignore_this_task(pid_list, next));
+                      trace_ignore_this_task(pid_list, prev) &&
+                      trace_ignore_this_task(pid_list, next));
 }
 
 static void
@@ -614,7 +567,7 @@ event_filter_pid_sched_switch_probe_post(void *data, bool 
preempt,
        pid_list = rcu_dereference_sched(tr->filtered_pids);
 
        this_cpu_write(tr->trace_buffer.data->ignore_pid,
-                      ignore_this_task(pid_list, next));
+                      trace_ignore_this_task(pid_list, next));
 }
 
 static void
@@ -630,7 +583,7 @@ event_filter_pid_sched_wakeup_probe_pre(void *data, struct 
task_struct *task)
        pid_list = rcu_dereference_sched(tr->filtered_pids);
 
        this_cpu_write(tr->trace_buffer.data->ignore_pid,
-                      ignore_this_task(pid_list, task));
+                      trace_ignore_this_task(pid_list, task));
 }
 
 static void
@@ -647,7 +600,7 @@ event_filter_pid_sched_wakeup_probe_post(void *data, struct 
task_struct *task)
 
        /* Set tracing if current is enabled */
        this_cpu_write(tr->trace_buffer.data->ignore_pid,
-                      ignore_this_task(pid_list, current));
+                      trace_ignore_this_task(pid_list, current));
 }
 
 static void __ftrace_clear_event_pids(struct trace_array *tr)
@@ -685,8 +638,7 @@ static void __ftrace_clear_event_pids(struct trace_array 
*tr)
        /* Wait till all users are no longer using pid filtering */
        synchronize_sched();
 
-       vfree(pid_list->pids);
-       kfree(pid_list);
+       trace_free_pid_list(pid_list);
 }
 
 static void ftrace_clear_event_pids(struct trace_array *tr)
@@ -1034,18 +986,8 @@ p_next(struct seq_file *m, void *v, loff_t *pos)
 {
        struct trace_array *tr = m->private;
        struct trace_pid_list *pid_list = 
rcu_dereference_sched(tr->filtered_pids);
-       unsigned long pid = (unsigned long)v;
-
-       (*pos)++;
-
-       /* pid already is +1 of the actual prevous bit */
-       pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
 
-       /* Return pid + 1 to allow zero to be represented */
-       if (pid < pid_list->pid_max)
-               return (void *)(pid + 1);
-
-       return NULL;
+       return trace_pid_next(pid_list, v, pos);
 }
 
 static void *p_start(struct seq_file *m, loff_t *pos)
@@ -1053,8 +995,6 @@ static void *p_start(struct seq_file *m, loff_t *pos)
 {
        struct trace_pid_list *pid_list;
        struct trace_array *tr = m->private;
-       unsigned long pid;
-       loff_t l = 0;
 
        /*
         * Grab the mutex, to keep calls to p_next() having the same
@@ -1070,15 +1010,7 @@ static void *p_start(struct seq_file *m, loff_t *pos)
        if (!pid_list)
                return NULL;
 
-       pid = find_first_bit(pid_list->pids, pid_list->pid_max);
-       if (pid >= pid_list->pid_max)
-               return NULL;
-
-       /* Return pid + 1 so that zero can be the exit value */
-       for (pid++; pid && l < *pos;
-            pid = (unsigned long)p_next(m, (void *)pid, &l))
-               ;
-       return (void *)pid;
+       return trace_pid_start(pid_list, pos);
 }
 
 static void p_stop(struct seq_file *m, void *p)
@@ -1088,14 +1020,6 @@ static void p_stop(struct seq_file *m, void *p)
        mutex_unlock(&event_mutex);
 }
 
-static int p_show(struct seq_file *m, void *v)
-{
-       unsigned long pid = (unsigned long)v - 1;
-
-       seq_printf(m, "%lu\n", pid);
-       return 0;
-}
-
 static ssize_t
 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
                  loff_t *ppos)
@@ -1654,7 +1578,7 @@ static void ignore_task_cpu(void *data)
                                             mutex_is_locked(&event_mutex));
 
        this_cpu_write(tr->trace_buffer.data->ignore_pid,
-                      ignore_this_task(pid_list, current));
+                      trace_ignore_this_task(pid_list, current));
 }
 
 static ssize_t
@@ -1666,13 +1590,7 @@ ftrace_event_pid_write(struct file *filp, const char 
__user *ubuf,
        struct trace_pid_list *filtered_pids = NULL;
        struct trace_pid_list *pid_list;
        struct trace_event_file *file;
-       struct trace_parser parser;
-       unsigned long val;
-       loff_t this_pos;
-       ssize_t read = 0;
-       ssize_t ret = 0;
-       pid_t pid;
-       int nr_pids = 0;
+       ssize_t ret;
 
        if (!cnt)
                return 0;
@@ -1681,93 +1599,15 @@ ftrace_event_pid_write(struct file *filp, const char 
__user *ubuf,
        if (ret < 0)
                return ret;
 
-       if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
-               return -ENOMEM;
-
        mutex_lock(&event_mutex);
+
        filtered_pids = rcu_dereference_protected(tr->filtered_pids,
                                             lockdep_is_held(&event_mutex));
 
-       /*
-        * Always recreate a new array. The write is an all or nothing
-        * operation. Always create a new array when adding new pids by
-        * the user. If the operation fails, then the current list is
-        * not modified.
-        */
-       pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
-       if (!pid_list) {
-               read = -ENOMEM;
-               goto out;
-       }
-       pid_list->pid_max = READ_ONCE(pid_max);
-       /* Only truncating will shrink pid_max */
-       if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
-               pid_list->pid_max = filtered_pids->pid_max;
-       pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
-       if (!pid_list->pids) {
-               kfree(pid_list);
-               read = -ENOMEM;
-               goto out;
-       }
-       if (filtered_pids) {
-               /* copy the current bits to the new max */
-               pid = find_first_bit(filtered_pids->pids,
-                                    filtered_pids->pid_max);
-               while (pid < filtered_pids->pid_max) {
-                       set_bit(pid, pid_list->pids);
-                       pid = find_next_bit(filtered_pids->pids,
-                                           filtered_pids->pid_max,
-                                           pid + 1);
-                       nr_pids++;
-               }
-       }
-
-       while (cnt > 0) {
-
-               this_pos = 0;
-
-               ret = trace_get_user(&parser, ubuf, cnt, &this_pos);
-               if (ret < 0 || !trace_parser_loaded(&parser))
-                       break;
-
-               read += ret;
-               ubuf += ret;
-               cnt -= ret;
-
-               parser.buffer[parser.idx] = 0;
-
-               ret = -EINVAL;
-               if (kstrtoul(parser.buffer, 0, &val))
-                       break;
-               if (val >= pid_list->pid_max)
-                       break;
-
-               pid = (pid_t)val;
-
-               set_bit(pid, pid_list->pids);
-               nr_pids++;
-
-               trace_parser_clear(&parser);
-               ret = 0;
-       }
-       trace_parser_put(&parser);
-
-       if (ret < 0) {
-               vfree(pid_list->pids);
-               kfree(pid_list);
-               read = ret;
+       ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
+       if (ret < 0)
                goto out;
-       }
 
-       if (!nr_pids) {
-               /* Cleared the list of pids */
-               vfree(pid_list->pids);
-               kfree(pid_list);
-               read = ret;
-               if (!filtered_pids)
-                       goto out;
-               pid_list = NULL;
-       }
        rcu_assign_pointer(tr->filtered_pids, pid_list);
 
        list_for_each_entry(file, &tr->events, list) {
@@ -1776,10 +1616,8 @@ ftrace_event_pid_write(struct file *filp, const char 
__user *ubuf,
 
        if (filtered_pids) {
                synchronize_sched();
-
-               vfree(filtered_pids->pids);
-               kfree(filtered_pids);
-       } else {
+               trace_free_pid_list(filtered_pids);
+       } else if (pid_list) {
                /*
                 * Register a probe that is called before all other probes
                 * to set ignore_pid if next or prev do not match.
@@ -1817,9 +1655,8 @@ ftrace_event_pid_write(struct file *filp, const char 
__user *ubuf,
  out:
        mutex_unlock(&event_mutex);
 
-       ret = read;
-       if (read > 0)
-               *ppos += read;
+       if (ret > 0)
+               *ppos += ret;
 
        return ret;
 }
@@ -1846,7 +1683,7 @@ static const struct seq_operations show_set_event_seq_ops 
= {
 static const struct seq_operations show_set_pid_seq_ops = {
        .start = p_start,
        .next = p_next,
-       .show = p_show,
+       .show = trace_pid_show,
        .stop = p_stop,
 };
 
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 5a095c2e4b69..0efa00d80623 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -43,7 +43,7 @@ static int allocate_ftrace_ops(struct trace_array *tr)
 
        /* Currently only the non stack verision is supported */
        ops->func = function_trace_call;
-       ops->flags = FTRACE_OPS_FL_RECURSION_SAFE;
+       ops->flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_PID;
 
        tr->ops = ops;
        ops->private = tr;
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index 3a0244ff7ea8..7363ccf79512 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -319,7 +319,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
        int cpu;
        int pc;
 
-       if (!ftrace_trace_task(current))
+       if (!ftrace_trace_task(tr))
                return 0;
 
        /* trace it when it is-nested-in or is a function enabled. */
@@ -338,6 +338,13 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
        if (ftrace_graph_notrace_addr(trace->func))
                return 1;
 
+       /*
+        * Stop here if tracing_threshold is set. We only write function return
+        * events to the ring buffer.
+        */
+       if (tracing_thresh)
+               return 1;
+
        local_irq_save(flags);
        cpu = raw_smp_processor_id();
        data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -355,14 +362,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
        return ret;
 }
 
-static int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
-{
-       if (tracing_thresh)
-               return 1;
-       else
-               return trace_graph_entry(trace);
-}
-
 static void
 __trace_graph_function(struct trace_array *tr,
                unsigned long ip, unsigned long flags, int pc)
@@ -457,7 +456,7 @@ static int graph_trace_init(struct trace_array *tr)
        set_graph_array(tr);
        if (tracing_thresh)
                ret = register_ftrace_graph(&trace_graph_thresh_return,
-                                           &trace_graph_thresh_entry);
+                                           &trace_graph_entry);
        else
                ret = register_ftrace_graph(&trace_graph_return,
                                            &trace_graph_entry);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5546eec0505f..9aedb0b06683 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -587,6 +587,7 @@ static int create_trace_kprobe(int argc, char **argv)
         *  $retval     : fetch return value
         *  $stack      : fetch stack address
         *  $stackN     : fetch Nth of stack (N:0-)
+        *  $comm       : fetch current task comm
         *  @ADDR       : fetch memory at ADDR (ADDR should be in kernel)
         *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
         *  %REG        : fetch register REG
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 68f376ca6d3f..cd7480d0a201 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -68,19 +68,15 @@ static void mmio_print_pcidev(struct trace_seq *s, const 
struct pci_dev *dev)
        trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x",
                         dev->bus->number, dev->devfn,
                         dev->vendor, dev->device, dev->irq);
-       /*
-        * XXX: is pci_resource_to_user() appropriate, since we are
-        * supposed to interpret the __ioremap() phys_addr argument based on
-        * these printed values?
-        */
        for (i = 0; i < 7; i++) {
-               pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+               start = dev->resource[i].start;
                trace_seq_printf(s, " %llx",
                        (unsigned long long)(start |
                        (dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
        }
        for (i = 0; i < 7; i++) {
-               pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+               start = dev->resource[i].start;
+               end = dev->resource[i].end;
                trace_seq_printf(s, " %llx",
                        dev->resource[i].start < dev->resource[i].end ?
                        (unsigned long long)(end - start) + 1 : 0);
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 1d372fa6fefb..74e80a582c28 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -218,6 +218,28 @@ free_bitfield_fetch_param(struct bitfield_fetch_param 
*data)
        kfree(data);
 }
 
+void FETCH_FUNC_NAME(comm, string)(struct pt_regs *regs,
+                                         void *data, void *dest)
+{
+       int maxlen = get_rloc_len(*(u32 *)dest);
+       u8 *dst = get_rloc_data(dest);
+       long ret;
+
+       if (!maxlen)
+               return;
+
+       ret = strlcpy(dst, current->comm, maxlen);
+       *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
+}
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string));
+
+void FETCH_FUNC_NAME(comm, string_size)(struct pt_regs *regs,
+                                              void *data, void *dest)
+{
+       *(u32 *)dest = strlen(current->comm) + 1;
+}
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string_size));
+
 static const struct fetch_type *find_fetch_type(const char *type,
                                                const struct fetch_type *ftbl)
 {
@@ -348,6 +370,11 @@ static int parse_probe_vars(char *arg, const struct 
fetch_type *t,
                        }
                } else
                        ret = -EINVAL;
+       } else if (strcmp(arg, "comm") == 0) {
+               if (strcmp(t->name, "string") != 0 &&
+                   strcmp(t->name, "string_size") != 0)
+                       return -EINVAL;
+               f->fn = t->fetch[FETCH_MTD_comm];
        } else
                ret = -EINVAL;
 
@@ -522,6 +549,12 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
                arg[t - parg->comm] = '\0';
                t++;
        }
+       /*
+        * The default type of $comm should be "string", and it can't be
+        * dereferenced.
+        */
+       if (!t && strcmp(arg, "$comm") == 0)
+               t = "string";
        parg->type = find_fetch_type(t, ftbl);
        if (!parg->type) {
                pr_info("Unsupported type: %s\n", t);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index f6398db09114..45400ca5ded1 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -102,6 +102,7 @@ enum {
        FETCH_MTD_reg = 0,
        FETCH_MTD_stack,
        FETCH_MTD_retval,
+       FETCH_MTD_comm,
        FETCH_MTD_memory,
        FETCH_MTD_symbol,
        FETCH_MTD_deref,
@@ -183,6 +184,14 @@ DECLARE_BASIC_FETCH_FUNCS(bitfield);
 #define fetch_bitfield_string                  NULL
 #define fetch_bitfield_string_size             NULL
 
+/* comm only makes sense as a string */
+#define fetch_comm_u8          NULL
+#define fetch_comm_u16         NULL
+#define fetch_comm_u32         NULL
+#define fetch_comm_u64         NULL
+DECLARE_FETCH_FUNC(comm, string);
+DECLARE_FETCH_FUNC(comm, string_size);
+
 /*
  * Define macro for basic types - we don't need to define s* types, because
  * we have to care only about bitwidth at recording time.
@@ -213,6 +222,7 @@ DEFINE_FETCH_##method(u64)
 ASSIGN_FETCH_FUNC(reg, ftype),                         \
 ASSIGN_FETCH_FUNC(stack, ftype),                       \
 ASSIGN_FETCH_FUNC(retval, ftype),                      \
+ASSIGN_FETCH_FUNC(comm, ftype),                                \
 ASSIGN_FETCH_FUNC(memory, ftype),                      \
 ASSIGN_FETCH_FUNC(symbol, ftype),                      \
 ASSIGN_FETCH_FUNC(deref, ftype),                       \
diff --git a/samples/Kconfig b/samples/Kconfig
index 559a58baff6e..27a24571e96c 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -11,6 +11,13 @@ config SAMPLE_TRACE_EVENTS
        help
          This build trace event example modules.
 
+config SAMPLE_TRACE_PRINTK
+        tristate "Build trace_printk module - tests various trace_printk 
formats"
+       depends on EVENT_TRACING && m
+       help
+        This builds a module that calls trace_printk() and can be used to
+        test various trace_printk() calls from a module.
+
 config SAMPLE_KOBJECT
        tristate "Build kobject examples -- loadable modules only"
        depends on m
diff --git a/samples/Makefile b/samples/Makefile
index 2e3b523d7097..1a20169d85ac 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -2,4 +2,4 @@
 
 obj-$(CONFIG_SAMPLES)  += kobject/ kprobes/ trace_events/ livepatch/ \
                           hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
-                          configfs/ connector/ v4l/
+                          configfs/ connector/ v4l/ trace_printk/
diff --git a/samples/trace_printk/Makefile b/samples/trace_printk/Makefile
new file mode 100644
index 000000000000..19900ab2b00d
--- /dev/null
+++ b/samples/trace_printk/Makefile
@@ -0,0 +1,6 @@
+# builds a module that calls various trace_printk routines
+# then to use one (as root):  insmod <module_name.ko>
+
+# This module can also be used to test the trace_printk code.
+
+obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace-printk.o
diff --git a/samples/trace_printk/trace-printk.c 
b/samples/trace_printk/trace-printk.c
new file mode 100644
index 000000000000..e9e0040ff7be
--- /dev/null
+++ b/samples/trace_printk/trace-printk.c
@@ -0,0 +1,56 @@
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/irq_work.h>
+
+/* Must not be static to force gcc to consider these non constant */
+char *trace_printk_test_global_str =
+       "This is a dynamic string that will use trace_puts\n";
+
+char *trace_printk_test_global_str_irq =
+       "(irq) This is a dynamic string that will use trace_puts\n";
+
+char *trace_printk_test_global_str_fmt =
+       "%sThis is a %s that will use trace_printk\n";
+
+static struct irq_work irqwork;
+
+static void trace_printk_irq_work(struct irq_work *work)
+{
+       trace_printk("(irq) This is a static string that will use 
trace_bputs\n");
+       trace_printk(trace_printk_test_global_str_irq);
+
+       trace_printk("(irq) This is a %s that will use trace_bprintk()\n",
+                    "static string");
+
+       trace_printk(trace_printk_test_global_str_fmt,
+                    "(irq) ", "dynamic string");
+}
+
+static int __init trace_printk_init(void)
+{
+       init_irq_work(&irqwork, trace_printk_irq_work);
+
+       trace_printk("This is a static string that will use trace_bputs\n");
+       trace_printk(trace_printk_test_global_str);
+
+       /* Kick off printing in irq context */
+       irq_work_queue(&irqwork);
+
+       trace_printk("This is a %s that will use trace_bprintk()\n",
+                    "static string");
+
+       trace_printk(trace_printk_test_global_str_fmt, "", "dynamic string");
+
+       return 0;
+}
+
+static void __exit trace_printk_exit(void)
+{
+}
+
+module_init(trace_printk_init);
+module_exit(trace_printk_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("trace-printk");
+MODULE_LICENSE("GPL");

Reply via email to