It appears as though the addition of the PID namespace did not update
the output code for /proc/*/sched, which resulted in it providing PIDs
that were not self-consistent with the /proc mount. This additionally
made it trivial to detect whether a process was inside &init_pid_ns from
userspace (making container detection trivial[1]). This lead to
situations such as:

  % unshare -pmf
  % mount -t proc proc /proc
  % head -n1 /proc/1/sched
  head (10047, #threads: 1)

Fix this by just using task_pid_nr_ns for the output of /proc/*/sched.
All of the other uses of task_pid_nr in kernel/sched/debug.c are from a
sysctl context and thus don't need to be namespaced.

[1]: https://github.com/jessfraz/amicontained

Cc: <sta...@vger.kernel.org>
Cc: Jess Frazelle <acidb...@google.com>
Signed-off-by: Aleksa Sarai <asa...@suse.com>
---
 fs/proc/base.c              | 3 ++-
 include/linux/sched/debug.h | 4 +++-
 kernel/sched/debug.c        | 5 +++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 719c2e943ea1..98fd8f6df851 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1408,12 +1408,13 @@ static const struct file_operations 
proc_fail_nth_operations = {
 static int sched_show(struct seq_file *m, void *v)
 {
        struct inode *inode = m->private;
+       struct pid_namespace *ns = inode->i_sb->s_fs_info;
        struct task_struct *p;
 
        p = get_proc_task(inode);
        if (!p)
                return -ESRCH;
-       proc_sched_show_task(p, m);
+       proc_sched_show_task(p, ns, m);
 
        put_task_struct(p);
 
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index e0eaee54c5a4..5d58d49e9f87 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -6,6 +6,7 @@
  */
 
 struct task_struct;
+struct pid_namespace;
 
 extern void dump_cpu_task(int cpu);
 
@@ -34,7 +35,8 @@ extern void sched_show_task(struct task_struct *p);
 
 #ifdef CONFIG_SCHED_DEBUG
 struct seq_file;
-extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
+extern void proc_sched_show_task(struct task_struct *p,
+                                struct pid_namespace *ns, struct seq_file *m);
 extern void proc_sched_set_task(struct task_struct *p);
 #endif
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4fa66de52bd6..ac345115877b 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -872,11 +872,12 @@ static void sched_show_numa(struct task_struct *p, struct 
seq_file *m)
 #endif
 }
 
-void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+                                                 struct seq_file *m)
 {
        unsigned long nr_switches;
 
-       SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr(p),
+       SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
                                                get_nr_threads(p));
        SEQ_printf(m,
                "---------------------------------------------------------"
-- 
2.13.3

Reply via email to