To test out the new utrace interface (I was using kernel 2.6.26-138.fc10.x86_64), I ported the crash-suspend.c example from the old interface to the new interface. It is attached to this email. Feel free to ignore/delete the extra debug printk's.
When run, it does what is is supposed to do, but triggers a BUG_ON message when the crashed/suspended process is put back in the foreground. Note that I don't have any real feel for whether the bug lies in my crash-suspend.c translation or the new utrace itself. The BUG details are in msg.txt. -- David Smith [EMAIL PROTECTED] Red Hat http://www.redhat.com 256.217.0141 (direct) 256.837.0057 (fax)
#include <linux/sched.h> #include <linux/pid.h> #include <linux/utrace.h> #include <linux/err.h> #include <linux/module.h> #include <linux/errno.h> MODULE_DESCRIPTION("automatic suspend on crash"); MODULE_LICENSE("GPL"); static int target_pid; static int verbose; module_param_named(pid, target_pid, int, 0); module_param(verbose, bool, 0); #define MY_EVENTS (UTRACE_EVENT(CLONE) | UTRACE_EVENT(DEATH) \ | UTRACE_EVENT(SIGNAL_CORE) | UTRACE_EVENT(JCTL)) /* * This is the interesting hook. */ static u32 crash_suspend_signal(u32 action, struct utrace_attached_engine *engine, struct task_struct *tsk, struct pt_regs *regs, siginfo_t *info, const struct k_sigaction *orig_ka, struct k_sigaction *return_ka) { printk("%s:%d action = 0x%x\n", __FUNCTION__, __LINE__, action); if (info->si_errno & 0x80000000) { info->si_errno &= ~0x80000000; printk("%s:%d\n", __FUNCTION__, __LINE__); return UTRACE_RESUME; } /* * If another engine is doing something, just get out of the way. */ if ((action & UTRACE_SIGNAL_MASK) != UTRACE_SIGNAL_CORE) return UTRACE_RESUME; printk("%s:%d\n", __FUNCTION__, __LINE__); info->si_errno |= 0x80000000; return UTRACE_SIGNAL_TSTP | UTRACE_SIGNAL_HOLD; } static u32 crash_suspend_jctl(enum utrace_resume_action action, struct utrace_attached_engine *engine, struct task_struct *tsk, bool notify, int type) { printk("%s:%d type = 0x%x\n", __FUNCTION__, __LINE__, type); if (type == CLD_STOPPED) { int signr = tsk->exit_code; pid_t pgid = task_pgrp_nr(tsk); if (verbose) printk("crash-suspend stopped" " pgrp %d for pid %d signal %d\n", pgid, tsk->pid, signr); if (signr != SIGSTOP && signr != SIGTSTP && signr != SIGTTOU && signr != SIGTTIN) /* * This is an unnatural stop induced by us, above. * Now that we have ourselves stopped with the * proper weirdo status, stop the rest of the * process group too in normal job control fashion. */ (void) kill_pgrp(find_pid(-pgid), SIGTTOU, 1); } return UTRACE_RESUME; } /* * On clone, attach to the child. */ static u32 crash_suspend_clone(enum utrace_resume_action action, struct utrace_attached_engine *engine, struct task_struct *parent, unsigned long clone_flags, struct task_struct *child) { struct utrace_attached_engine *child_engine; printk("%s:%d\n", __FUNCTION__, __LINE__); child_engine = utrace_attach(child, UTRACE_ATTACH_CREATE, engine->ops, 0); if (IS_ERR(child_engine)) { printk("attach to clone child %d (%lx) from 0x%p => %ld\n", child->pid, clone_flags, engine, PTR_ERR(child_engine)); } else utrace_set_events(child, child_engine, MY_EVENTS); return UTRACE_RESUME; } /* * If we are still attached at task death, it didn't die by core dump signal. * Just detach and let it go. */ static u32 crash_suspend_death(struct utrace_attached_engine *engine, struct task_struct *tsk, bool group_dead, int signal) { printk("%s:%d\n", __FUNCTION__, __LINE__); return UTRACE_DETACH; } static const struct utrace_engine_ops crash_suspend_ops = { .report_clone = crash_suspend_clone, .report_death = crash_suspend_death, .report_signal = crash_suspend_signal, .report_jctl = crash_suspend_jctl, }; static int __init init_crash_suspend(void) { struct task_struct *target; struct utrace_attached_engine *engine; rcu_read_lock(); target = find_task_by_pid(target_pid); if (target) get_task_struct(target); rcu_read_unlock(); if (target == NULL) { printk("cannot find PID %d\n", target_pid); return -ESRCH; } engine = utrace_attach(target, UTRACE_ATTACH_CREATE, &crash_suspend_ops, 0); if (IS_ERR(engine)) printk("utrace_attach: %ld\n", PTR_ERR(engine)); else if (engine == NULL) printk("utrace_attach => null!\n"); else printk("attached to %d => 0x%p\n", target->pid, engine); utrace_set_events(target, engine, MY_EVENTS); WARN_ON(atomic_dec_and_test(&target->usage)); return 0; } static void __exit exit_crash_suspend(void) { struct task_struct *t; struct utrace_attached_engine *engine; int n = 0; rcu_read_lock(); for_each_process(t) { engine = utrace_attach(t, UTRACE_ATTACH_MATCH_OPS, &crash_suspend_ops, 0); if (IS_ERR(engine)) { int error = -PTR_ERR(engine); if (error != ENOENT) printk("!!! utrace_attach returned %d on %d\n", error, t->pid); } else { utrace_control(t, engine, UTRACE_DETACH); ++n; } } rcu_read_unlock(); printk("detached from %d threads, unloading\n", n); } module_init(init_crash_suspend); module_exit(exit_crash_suspend);
Jul 23 14:52:16 dhcp-148 kernel:<4>attached to 3132 => 0xffff81000a46d000 Jul 23 14:52:28 dhcp-148 kernel:<4>crash_suspend_clone:90 Jul 23 14:52:31 dhcp-148 kernel:<4>crash_suspend_signal:32 action = 0x35 Jul 23 14:52:31 dhcp-148 kernel:<4>crash_suspend_signal:45 Jul 23 14:52:31 dhcp-148 kernel:<4>crash_suspend_jctl:56 type = 0x40005 Jul 23 14:52:31 dhcp-148 kernel:<4>crash-suspend stopped pgrp 4327 for pid 4327 signal 3 Jul 23 14:52:34 dhcp-148 kernel:<0>------------[ cut here ]------------ Jul 23 14:52:34 dhcp-148 kernel:<2>kernel BUG at kernel/utrace.c:1188! Jul 23 14:52:34 dhcp-148 kernel:<0>invalid opcode: 0000 [4] SMP DEBUG_PAGEALLOC Jul 23 14:52:34 dhcp-148 kernel:<4>CPU 0 Jul 23 14:52:34 dhcp-148 kernel:<4>Modules linked in: crash_suspend nfs lockd nfs_acl rfcomm l2cap bluetooth autofs4 fuse sunrpc ipt_REJECT nf_conntrack_ipv4 iptable_filter ip_tables ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables x_tables ipv6 loop floppy pcspkr 8139too 8139cp mii i2c_piix4 i2c_core sr_mod cdrom dm_snapshot dm_zero dm_mirror dm_log dm_mod pata_acpi ata_piix ata_generic ext3 jbd mbcache uhci_hcd ohci_hcd ehci_hcd [last unloaded: crash_suspend] Jul 23 14:52:34 dhcp-148 kernel:<4>Pid: 4327, comm: cat Tainted: G D 2.6.26-138.fc10.x86_64 #1 Jul 23 14:52:34 dhcp-148 kernel:<4>RIP: 0010:[<ffffffff81072a54>] [<ffffffff81072a54>] start_report+0x11/0x38 Jul 23 14:52:34 dhcp-148 kernel:<4>RSP: 0018:ffff81000a4afd18 EFLAGS: 00010202 Jul 23 14:52:34 dhcp-148 kernel:<4>RAX: 0000000000000063 RBX: 0000000000040006 RCX: 0000000000004601 Jul 23 14:52:34 dhcp-148 kernel:<4>RDX: 0000000000004600 RSI: 0000000000040006 RDI: ffff81001950d1f8 Jul 23 14:52:34 dhcp-148 kernel:<4>RBP: ffff81000a4afd28 R08: 0000000000000002 R09: 0000000000000001 Jul 23 14:52:34 dhcp-148 kernel:<4>R10: ffff81001d993e08 R11: ffff810010818048 R12: ffff81001950d1f8 Jul 23 14:52:34 dhcp-148 kernel:<4>R13: ffff810010818000 R14: ffff81001d950000 R15: ffff81000a4afd58 Jul 23 14:52:34 dhcp-148 kernel:<4>FS: 00007f627e0446f0(0000) GS:ffffffff81492000(0000) knlGS:0000000000000000 Jul 23 14:52:34 dhcp-148 kernel:<4>CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b Jul 23 14:52:34 dhcp-148 kernel:<4>CR2: 00000000006bd4b4 CR3: 000000001e0f0000 CR4: 00000000000006e0 Jul 23 14:52:34 dhcp-148 kernel:<4>DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 Jul 23 14:52:34 dhcp-148 kernel:<4>DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Jul 23 14:52:34 dhcp-148 kernel:<4>Process cat (pid: 4327, threadinfo ffff81000a4ae000, task ffff810010818000) Jul 23 14:52:34 dhcp-148 kernel:<4>Stack: 0000000000040006 ffff81001950d1f8 ffff81000a4afd98 ffffffff810730d9 Jul 23 14:52:34 dhcp-148 kernel:<4> ffffffff810461c9 0000000100040006 ffff81001d993e20 0000000000000046 Jul 23 14:52:34 dhcp-148 kernel:<4> 0000000000000005 ffff81000a000000 ffff81001d993e08 0000000000040006 Jul 23 14:52:34 dhcp-148 kernel:<4>Call Trace: Jul 23 14:52:34 dhcp-148 kernel:<4> [<ffffffff810730d9>] utrace_report_jctl+0xeb/0x1f1 Jul 23 14:52:34 dhcp-148 kernel:<4> [<ffffffff810461c9>] ? get_signal_to_deliver+0x61/0x2c3 Jul 23 14:52:34 dhcp-148 kernel:<4> [<ffffffff81046215>] get_signal_to_deliver+0xad/0x2c3 Jul 23 14:52:34 dhcp-148 kernel:<4> [<ffffffff8100b377>] do_notify_resume+0x90/0x90a Jul 23 14:52:34 dhcp-148 kernel:<4> [<ffffffff8107c09c>] ? audit_syscall_exit+0x331/0x353 Jul 23 14:52:34 dhcp-148 kernel:<4> [<ffffffff8100c402>] int_signal+0x12/0x17 Jul 23 14:52:34 dhcp-148 kernel:<4> Jul 23 14:52:34 dhcp-148 kernel:<4> Jul 23 14:52:34 dhcp-148 kernel:<0>Code: 48 85 c0 49 89 45 08 74 04 31 c0 eb 05 b8 f5 ff ff ff 5e 5b 41 5c 41 5d c9 c3 55 48 89 e5 41 54 49 89 fc 53 8a 47 5c a8 01 74 04 <0f> 0b eb fe a8 02 74 1a 48 8d 5f 20 48 89 df e8 a5 d7 26 00 41 Jul 23 14:52:34 dhcp-148 kernel:<1>RIP [<ffffffff81072a54>] start_report+0x11/0x38 Jul 23 14:52:34 dhcp-148 kernel:<4> RSP <ffff81000a4afd18> Jul 23 14:52:34 dhcp-148 kernel:<4>---[ end trace 04166b488c6df96e ]--- Jul 23 14:52:34 dhcp-148 kernel:<4>crash_suspend_death:112 Jul 23 15:05:15 dhcp-148 kernel:<4>crash_suspend_clone:90 Jul 23 15:05:15 dhcp-148 kernel:<4>crash_suspend_death:112 Jul 23 16:06:57 dhcp-148 kernel:<4>crash_suspend_clone:90 Jul 23 16:06:57 dhcp-148 kernel:<4>crash_suspend_death:112 Jul 23 16:07:04 dhcp-148 kernel:<4>crash_suspend_clone:90 Jul 23 16:07:04 dhcp-148 kernel:<4>detached from 2 threads, unloading