After do_exit() -> exit_notify() process pid and tgid may be detached (free_pid()) before proc_exit_connector() call happens, thus proc_exit_connector() fail to report proper pid/tgid via netlink.
Ordinary "good" scenario: parent 37311 [000] 22305.315986: probe:alloc_pid_L85: child_process_pid=38210 ffffffffa4f32888 alloc_pid+0x138 ffffffffa4f07850 copy_process+0xc20 ffffffffa4f084cb kernel_clone+0x9b ffffffffa4f08980 __do_sys_clone+0x60 ffffffffa5a9e45c do_syscall_64+0x5c ffffffffa5c0012f entry_SYSCALL_64+0xaf 1189d7 _Fork+0x27 (/usr/lib64/libc.so.6) // proc_event_connector_ve() is executed BEFORE parent gets SIGCHLD child 38210 [000] 22305.316156: probe:fill_exit_event_L7: ffffffffa5616568 fill_exit_event+0x38 (/usr/lib/debug/lib/modules/5.14.0-427.55.1.vz9.82.9/vmlinux) ffffffffa5616a51 proc_event_connector_ve+0xa1 (/usr/lib/debug/lib/modules/5.14.0-427.55.1.vz9.82.9/vmlinux) ffffffffa4f0fe7f do_exit+0x2cf (/usr/lib/debug/lib/modules/5.14.0-427.55.1.vz9.82.9/vmlinux) ffffffffa4f1018d do_group_exit+0x2d (/usr/lib/debug/lib/modules/5.14.0-427.55.1.vz9.82.9/vmlinux) // parents gets SIGCHLD AFTER data for netlink listeners is filled, child pid/tgid is detached parent 37311 [001] 22305.316162: probe:free_pid_L9: child_process_pid=38210 ffffffffa4f32637 free_pid+0x57 ffffffffa4f0db11 __exit_signal+0x2d1 ffffffffa4f0df25 release_task+0xc5 ffffffffa4f0e73a wait_task_zombie+0x14a ffffffffa4f0f00a do_wait+0x16a ffffffffa4f10336 kernel_wait4+0xa6 Possible "bad" scenario: parent 37311 [001] 22305.316371: probe:alloc_pid_L85: child_process_pid=38211 ffffffffa4f32888 alloc_pid+0x138 ffffffffa4f07850 copy_process+0xc20 ffffffffa4f084cb kernel_clone+0x9b ffffffffa4f08980 __do_sys_clone+0x60 ffffffffa5a9e45c do_syscall_64+0x5c ffffffffa5c0012f entry_SYSCALL_64+0xaf 1189d7 _Fork+0x27 (/usr/lib64/libc.so.6) // parents gets SIGCHLD BEFORE data for netlink listeners is filled, child pid/tgid is detached parent 37311 [000] 22305.316552: probe:free_pid_L9: child_process_pid=38211 ffffffffa4f32637 free_pid+0x57 ffffffffa4f0db11 __exit_signal+0x2d1 ffffffffa4f0df25 release_task+0xc5 ffffffffa4f0e73a wait_task_zombie+0x14a ffffffffa4f0f00a do_wait+0x16a ffffffffa4f10336 kernel_wait4+0xa6 // proc_event_connector_ve() is executed AFTER parent gets SIGCHLD, // cannot find pid/tgid in idr anymore :-1 -1 [001] 22305.316560: probe:fill_exit_event_L7: (ffffffffa5616567) pid_ns=0xffffffffa6a5efa0 process_pid=0 ffffffffa5616568 fill_exit_event+0x38 ffffffffa5616a51 proc_event_connector_ve+0xa1 ffffffffa4f0fe7f do_exit+0x2cf ffffffffa4f1018d do_group_exit+0x2d The issue is caught by LTP suite_connectors.exec.cn_pec_sh test, so the reproducer is: // install/build LTP cd /opt/ltp/testcases/bin/ export PATH=$PATH:/opt/ltp/testcases/bin for i in $(seq 1 1024); do echo -n "$i: "; TST_TIMEOUT=100 cn_pec.sh 2>&1 | \ tee run.log | egrep "failed.*0" || break; done; cat run.log Solve this by saving process pid/tgid before exit_notify() is called and use them in proc_exit_connector(). https://virtuozzo.atlassian.net/browse/PSBM-161075 Fixes: 858c48a22fb2 ("proc connector: add pid namespace awareness") Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com> Feature: cgroup: control group rules daemon (cgrulesengd) virtualization --- drivers/connector/cn_proc.c | 11 ++++++----- include/linux/cn_proc.h | 13 +++++++++++-- kernel/exit.c | 7 ++++++- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 3349b6336e9f..2d1bad855357 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -292,13 +292,14 @@ void proc_coredump_connector(struct task_struct *task) } static bool fill_exit_event(struct proc_event *ev, struct ve_struct *ve, - struct task_struct *task, long unused) + struct task_struct *task, long cookie_pids) { struct pid_namespace *pid_ns = ve->ve_ns->pid_ns_for_children; struct task_struct *parent; + struct pids *pids = (struct pids *)cookie_pids; - ev->event_data.exit.process_pid = task_pid_nr_ns(task, pid_ns); - ev->event_data.exit.process_tgid = task_tgid_nr_ns(task, pid_ns); + ev->event_data.exit.process_pid = pid_nr_ns(pids->pid, pid_ns); + ev->event_data.exit.process_tgid = pid_nr_ns(pids->tgid, pid_ns); ev->event_data.exit.exit_code = task->exit_code; ev->event_data.exit.exit_signal = task->exit_signal; @@ -314,9 +315,9 @@ static bool fill_exit_event(struct proc_event *ev, struct ve_struct *ve, return true; } -void proc_exit_connector(struct task_struct *task) +void proc_exit_connector(struct task_struct *task, struct pids *pids) { - proc_event_connector(task, PROC_EVENT_EXIT, 0, fill_exit_event); + proc_event_connector(task, PROC_EVENT_EXIT, (long)pids, fill_exit_event); } /* diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h index 1d5b02a96c46..9701c13d82df 100644 --- a/include/linux/cn_proc.h +++ b/include/linux/cn_proc.h @@ -19,6 +19,15 @@ #include <uapi/linux/cn_proc.h> +/* + * The struct is used solely for pinning task pids for proc connector + * notification on process exit. + */ +struct pids { + struct pid *pid; + struct pid *tgid; +}; + #ifdef CONFIG_PROC_EVENTS void proc_fork_connector(struct task_struct *task); void proc_exec_connector(struct task_struct *task); @@ -27,7 +36,7 @@ void proc_sid_connector(struct task_struct *task); void proc_ptrace_connector(struct task_struct *task, int which_id); void proc_comm_connector(struct task_struct *task); void proc_coredump_connector(struct task_struct *task); -void proc_exit_connector(struct task_struct *task); +void proc_exit_connector(struct task_struct *task, struct pids *pids); #else static inline void proc_fork_connector(struct task_struct *task) {} @@ -52,7 +61,7 @@ static inline void proc_ptrace_connector(struct task_struct *task, static inline void proc_coredump_connector(struct task_struct *task) {} -static inline void proc_exit_connector(struct task_struct *task) +static inline void proc_exit_connector(struct task_struct *task, struct pids *pids) {} #endif /* CONFIG_PROC_EVENTS */ #endif /* CN_PROC_H */ diff --git a/kernel/exit.c b/kernel/exit.c index 7273cecde8ce..717013bd378e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -788,6 +788,7 @@ void __noreturn do_exit(long code) { struct task_struct *tsk = current; int group_dead; + struct pids pids; WARN_ON(tsk->plug); @@ -868,8 +869,12 @@ void __noreturn do_exit(long code) flush_ptrace_hw_breakpoint(tsk); exit_tasks_rcu_start(); + pids.pid = get_pid(task_pid(tsk)); + pids.tgid = get_pid(task_tgid(tsk)); exit_notify(tsk, group_dead); - proc_exit_connector(tsk); + proc_exit_connector(tsk, &pids); + put_pid(pids.tgid); + put_pid(pids.pid); mpol_put_task_policy(tsk); #ifdef CONFIG_FUTEX if (unlikely(current->pi_state_cache)) -- 2.43.5 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel