Concurrent read/write operations on the set_ftrace_{notrace,filter}
files may probabilistically trigger the following issues:

[ 2715.745293] BUG: unable to handle page fault for address: 00000003da393970
[ 2715.753736] CPU: 1 UID: 0 PID: 1324 Comm: read Not tainted 
6.16.0-next-20250808 #1 PREEMPT(full)
[ 2715.755292] RIP: 0010:ftrace_lookup_ip+0x40/0x70
[ 2715.761114] Call Trace:
[ 2715.761462]  <TASK>
[ 2715.761705]  t_func_next.isra.0+0xaa/0xd0
[ 2715.762049]  t_start+0xa3/0x140
[ 2715.762207]  seq_read_iter+0xe8/0x4a0
[ 2715.762564]  seq_read+0x101/0x140
[ 2715.762769]  vfs_read+0xbd/0x340
[ 2715.763014]  ? preempt_count_add+0x4b/0xa0
[ 2715.763311]  ? do_sys_openat2+0x8c/0xd0
[ 2715.763623]  ksys_read+0x65/0xe0
[ 2715.763797]  do_syscall_64+0x4e/0x1c0
[ 2715.764049]  entry_SYSCALL_64_after_hwframe+0x76/0x7e

The issue can be reproduced with the following script (using the
set_ftrace_notrace file as an example):

  while true; do
    echo __probestub_initcall_level > /sys/kernel/tracing/set_ftrace_notrace &
    cat /sys/kernel/tracing/set_ftrace_notrace &
  done

The root cause is that ftrace_regex_open and ftrace_regex_release
do not properly handle concurrent synchronization for notrace_hash.
Consider a race scenario between a reader and a writer:

1. The reader first obtains the value of notrace_hash via
   ftrace_regex_open().
2. The writer then updates notrace_hash via ftrace_regex_release()
   and frees the memory pointed to by the old notrace_hash.
3. Later, the reader accesses the old notrace_hash memory while
   ftrace_hash_empty() and ftrace_lookup_ip(), leading to a UAF.

CPU 1 (read)                                    CPU 2 (write)
ftrace_regex_open
  hash = ops->func_hash->notrace_hash;
  iter->hash = hash;
                                                ftrace_regex_open
                                                ftrace_regex_release
                                                  orig_hash = 
&iter->ops->func_hash->notrace_hash;
                                                  old_hash = *orig_hash;
                                                  
free_ftrace_hash_rcu(old_hash);
t_start
  ftrace_hash_empty(iter->hash)
  t_func_next
    !ftrace_lookup_ip(iter->hash, rec->ip)

Since the reader's hash is always tied to its file descriptor (fd),
the writer cannot directly manage the reader's hash. To fix this,
introduce a refcount for ftrace_hash, initialized to 1. The count
is incremented only when a reader opens it, and decremented when
either a reader or writer releases it, thereby controlling the timing
of ftrace_hash deallocation.

Fixes: c20489dad156 ("ftrace: Assign iter->hash to filter or notrace hashes on 
seq read")
Signed-off-by: Tengda Wu <[email protected]>
---
 kernel/trace/ftrace.c | 27 ++++++++++++++++++++++++---
 kernel/trace/trace.h  |  2 ++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index cade13595b08..be4842054254 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1060,6 +1060,7 @@ struct ftrace_func_probe {
 static const struct hlist_head empty_buckets[1];
 static const struct ftrace_hash empty_hash = {
        .buckets = (struct hlist_head *)empty_buckets,
+       .refcount = REFCOUNT_INIT(1),
 };
 #define EMPTY_HASH     ((struct ftrace_hash *)&empty_hash)
 
@@ -1282,6 +1283,22 @@ static void free_ftrace_hash_rcu(struct ftrace_hash 
*hash)
        call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
 }
 
+static void get_ftrace_hash(struct ftrace_hash *hash)
+{
+       if (!hash || hash == EMPTY_HASH)
+               return;
+       if (!refcount_inc_not_zero(&hash->refcount))
+               WARN_ON(1);
+}
+
+static void put_ftrace_hash_rcu(struct ftrace_hash *hash)
+{
+       if (!hash || hash == EMPTY_HASH)
+               return;
+       if (refcount_dec_and_test(&hash->refcount))
+               call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
+}
+
 /**
  * ftrace_free_filter - remove all filters for an ftrace_ops
  * @ops: the ops to remove the filters from
@@ -1316,6 +1333,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int 
size_bits)
        }
 
        hash->size_bits = size_bits;
+       refcount_set(&hash->refcount, 1);
 
        return hash;
 }
@@ -3362,7 +3380,7 @@ static int __ftrace_hash_move_and_update_ops(struct 
ftrace_ops *ops,
        ret = ftrace_hash_move(ops, enable, orig_hash, hash);
        if (!ret) {
                ftrace_ops_update_code(ops, &old_hash_ops);
-               free_ftrace_hash_rcu(old_hash);
+               put_ftrace_hash_rcu(old_hash);
        }
        return ret;
 }
@@ -3714,7 +3732,7 @@ static int ftrace_hash_move_and_update_subops(struct 
ftrace_ops *subops,
                *orig_subhash = save_hash;
                free_ftrace_hash_rcu(new_hash);
        } else {
-               free_ftrace_hash_rcu(save_hash);
+               put_ftrace_hash_rcu(save_hash);
        }
        return ret;
 }
@@ -4666,8 +4684,10 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
                        trace_parser_put(&iter->parser);
                        goto out_unlock;
                }
-       } else
+       } else {
                iter->hash = hash;
+               get_ftrace_hash(iter->hash);
+       }
 
        ret = 0;
 
@@ -6544,6 +6564,7 @@ int ftrace_regex_release(struct inode *inode, struct file 
*file)
                mutex_unlock(&ftrace_lock);
        } else {
                /* For read only, the hash is the ops hash */
+               put_ftrace_hash_rcu(iter->hash);
                iter->hash = NULL;
        }
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1dbf1d3cf2f1..4936cd218c36 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -22,6 +22,7 @@
 #include <linux/ctype.h>
 #include <linux/once_lite.h>
 #include <linux/ftrace_regs.h>
+#include <linux/refcount.h>
 
 #include "pid_list.h"
 
@@ -905,6 +906,7 @@ struct ftrace_hash {
        unsigned long           count;
        unsigned long           flags;
        struct rcu_head         rcu;
+       refcount_t              refcount;
 };
 
 struct ftrace_func_entry *
-- 
2.34.1


Reply via email to