Public bug reported: Scheduler deadlocks have been observed on c1.xlarge EC2 instances running 10.04.3 LTS with the 3.0.0-20-virtual Oneiric backport kernel. The symptoms appear similar to bug 929941, where multiple CPUs are waiting on scheduler runqueue locks. But in this case, only a few CPUs are stuck.
A typical set of stack traces from the guest state looks like: VCPU0 rip: ffffffff810013aa hypercall_page+0x3aa flags: 00001202 i nz rsp: ffff8801b3c27910 rax: 0000000000000000 rcx: ffffffff810013aa rdx: ffff8801b3c27954 rbx: ffff88000265cb30 rsi: ffff8801b3c27938 rdi: 0000000000000003 rbp: ffff8801b3c27958 r8: 0000000000000001 r9: 0000000000000001 r10: 0000000000000000 r11: 0000000000000202 r12: 0000000000000011 r13: 0000000000000001 r14: 0000000000000001 r15: 0000000000000000 cs: e033 ss: e02b ds: 0000 es: 0000 fs: 0000 @ 00007f4ce223f700 gs: 0000 @ ffff8801bfed4000/0000000000000000 cr0: 80050033 cr2: 0061ade0 cr3: 0e93d000 cr4: 00002660 dr0: 00000000 dr1: 00000000 dr2: 00000000 dr3: 00000000 dr6: ffff0ff0 dr7: 00000400 Code (instr addr ffffffff810013aa) cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc Stack: 0000000000000246 0000000000000000 ffffffff81394b42 ffff8801b3c27938 0000000000000000 ffff8801b3c27954 ffffffff00000001 0000000000000000 0000000481394ad6 ffff8801b3c27968 ffffffff81394b60 ffff8801b3c279b8 ffffffff8100933f ffff8801b3c27a48 0000000000000000 ffff8801b3c27998 Call Trace: [<ffffffff810013aa>] hypercall_page+0x3aa <-- [<ffffffff81394b42>] xen_poll_irq_timeout+0x42 [<ffffffff81394b60>] xen_poll_irq+0x10 [<ffffffff8100933f>] xen_spin_lock_slow+0x7f [<ffffffff81009435>] xen_spin_lock_flags+0x75 [<ffffffff8160365f>] _raw_spin_lock_irqsave+0x2f [<ffffffff8104ee50>] task_rq_lock+0x40 [<ffffffff8104f069>] task_sched_runtime+0x29 [<ffffffff81085f38>] thread_group_cputime+0x88 [<ffffffff812c4dc9>] apparmor_ptrace_access_check+0x39 [<ffffffff81051593>] thread_group_times+0x33 [<ffffffff811d4192>] do_task_stat+0x6d2 [<ffffffff8160361e>] _raw_spin_lock+0xe [<ffffffff8119165f>] seq_open+0x4f [<ffffffff811d0380>] sched_autogroup_show+0x70 [<ffffffff811d0380>] sched_autogroup_show+0x70 [<ffffffff8119173a>] single_open+0x7a [<ffffffff811cdbf0>] sched_open+0x20 [<ffffffff811cdc0b>] proc_single_open+0x1b [<ffffffff8118e460>] mntput_no_expire+0x60 [<ffffffff8118e5ad>] mntput+0x1d [<ffffffff811d4624>] proc_tgid_stat+0x14 [<ffffffff811d03e1>] proc_single_show+0x61 [<ffffffff81191be2>] seq_read+0xf2 [<ffffffff8116fe35>] vfs_read+0xc5 [<ffffffff81170001>] sys_read+0x51 [<ffffffff8160ba02>] system_call_fastpath+0x16 VCPU1 rip: ffffffff8105a777 try_to_wake_up+0xd7 flags: 00001202 i nz rsp: ffff8801bfef28f0 rax: 0000000000000003 rcx: 0000000000000000 rdx: 0000000000000001 rbx: 0000000000012980 rsi: ffff8801b1990078 rdi: 0000000000000000 rbp: ffff8801bfef2950 r8: 0000000000000000 r9: 0000000000000000 r10: 0000000000000000 r11: 00000000fb981853 r12: ffff88000265c530 r13: 0000000000000000 r14: ffff88000265cb30 r15: 0000000000000000 cs: e033 ss: e02b ds: 0000 es: 0000 fs: 0000 @ 00007ff68926c700 gs: 0000 @ ffff8801bfeef000/0000000000000000 cr0: 8005003b cr2: 00441d80 cr3: 1174cb000 cr4: 00002660 dr0: 00000000 dr1: 00000000 dr2: 00000000 dr3: 00000000 dr6: ffff0ff0 dr7: 00000400 Code (instr addr ffffffff8105a777) 00 00 eb 0c 66 2e 0f 1f 84 00 00 00 00 00 f3 90 41 8b 54 24 28 <85> d2 75 f5 49 8b 14 24 31 c0 83 Stack: 0000000000000000 ffff8801b41d2858 ffff8801bfef2950 ffffffff8153e51e 0000000300000004 ffff8801b1990078 ffff8801bfef2930 ffff8800026f6c18 0000000000000001 ffff8800026f6c30 0000000000000000 0000000000000000 ffff8801bfef2960 ffffffff8105a962 ffff8801bfef29b0 ffffffff81049709 Call Trace: [<ffffffff8105a777>] try_to_wake_up+0xd7 <-- [<ffffffff8153e51e>] ip_finish_output+0x16e [<ffffffff8105a962>] default_wake_function+0x12 [<ffffffff81049709>] __wake_up_common+0x59 [<ffffffff81049758>] __wake_up_locked+0x18 [<ffffffff811afcf4>] ep_poll_callback+0xa4 [<ffffffff81049709>] __wake_up_common+0x59 [<ffffffff8104ed53>] __wake_up_sync_key+0x53 [<ffffffff814f3d2e>] sock_def_readable+0x3e [<ffffffff8155255a>] tcp_rcv_established+0x26a [<ffffffff8100742d>] xen_force_evtchn_callback+0xd [<ffffffff81007b72>] check_events+0x12 [<ffffffff8155a9a5>] tcp_v4_do_rcv+0x125 [<ffffffff8155c169>] tcp_v4_rcv+0x5a9 [<ffffffff8153854d>] ip_local_deliver_finish+0xdd [<ffffffff81538790>] ip_local_deliver+0x80 [<ffffffff81537db9>] ip_rcv_finish+0x119 [<ffffffff815383a8>] ip_rcv+0x228 [<ffffffff815d0afd>] packet_rcv_spkt+0x4d [<ffffffff815036b0>] __netif_receive_skb+0x1e0 [<ffffffff81506790>] netif_receive_skb+0x80 [<ffffffff814614f4>] handle_incoming_queue+0x134 [<ffffffff81461fd7>] xennet_poll+0x277 [<ffffffff81506ff8>] net_rx_action+0x108 [<ffffffff8160361e>] _raw_spin_lock+0xe [<ffffffff8106889f>] __do_softirq+0xbf [<ffffffff810d3f7d>] handle_edge_irq+0x9d [<ffffffff8160cc1c>] call_softirq+0x1c [<ffffffff8100d3d5>] do_softirq+0x65 [<ffffffff8106869d>] irq_exit+0xbd [<ffffffff81394ee5>] xen_evtchn_do_upcall+0x35 [<ffffffff8160cc6e>] xen_do_hypervisor_callback+0x1e VCPU2 rip: ffffffff810013aa hypercall_page+0x3aa flags: 00001202 i nz rsp: ffff8801bff0da00 rax: 0000000000000000 rcx: ffffffff810013aa rdx: ffff8801bff0da44 rbx: ffff8800026f6c00 rsi: ffff8801bff0da28 rdi: 0000000000000003 rbp: ffff8801bff0da48 r8: 00000000000000c3 r9: 000000000000c110 r10: 0000000000100000 r11: 0000000000000202 r12: 000000000000001d r13: 0000000000000001 r14: 0000000000000001 r15: 0000000000000000 cs: e033 ss: e02b ds: 0000 es: 0000 fs: 0000 @ 00007f408128b700 gs: 0000 @ ffff8801bff0a000/0000000000000000 cr0: 80050033 cr2: 0061ade0 cr3: 289f2000 cr4: 00002660 dr0: 00000000 dr1: 00000000 dr2: 00000000 dr3: 00000000 dr6: ffff0ff0 dr7: 00000400 Code (instr addr ffffffff810013aa) cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc Stack: 0000000000112000 0000000000000000 ffffffff81394b42 ffff8801bff0da28 0000000000000000 ffff8801bff0da44 ffffffff00000001 0000000000000000 0000001081394ad6 ffff8801bff0da58 ffffffff81394b60 ffff8801bff0daa8 ffffffff8100933f ffff8801bff0da88 0000000000000000 ffff8801b3d14530 Call Trace: [<ffffffff810013aa>] hypercall_page+0x3aa <-- [<ffffffff81394b42>] xen_poll_irq_timeout+0x42 [<ffffffff81394b60>] xen_poll_irq+0x10 [<ffffffff8100933f>] xen_spin_lock_slow+0x7f [<ffffffff81009435>] xen_spin_lock_flags+0x75 [<ffffffff8160365f>] _raw_spin_lock_irqsave+0x2f [<ffffffff811afca9>] ep_poll_callback+0x59 [<ffffffff816036ae>] _raw_spin_unlock_irqrestore+0x1e [<ffffffff81049709>] __wake_up_common+0x59 [<ffffffff8104ed53>] __wake_up_sync_key+0x53 [<ffffffff814f3d2e>] sock_def_readable+0x3e [<ffffffff8154ef50>] tcp_data_queue+0x300 [<ffffffff81552639>] tcp_rcv_established+0x349 [<ffffffff8155a9a5>] tcp_v4_do_rcv+0x125 [<ffffffff8155c169>] tcp_v4_rcv+0x5a9 [<ffffffff81054e18>] enqueue_sleeper+0x188 [<ffffffff8153854d>] ip_local_deliver_finish+0xdd [<ffffffff81538790>] ip_local_deliver+0x80 [<ffffffff81537db9>] ip_rcv_finish+0x119 [<ffffffff815383a8>] ip_rcv+0x228 [<ffffffff815036b0>] __netif_receive_skb+0x1e0 [<ffffffff81007b5f>] xen_restore_fl_direct_reloc+0x4 [<ffffffff810d7af4>] rcu_enter_nohz+0x44 [<ffffffff81503aeb>] process_backlog+0x10b [<ffffffff810d10b5>] handle_irq_event_percpu+0xb5 [<ffffffff81506ff8>] net_rx_action+0x108 [<ffffffff8100742d>] xen_force_evtchn_callback+0xd [<ffffffff8106889f>] __do_softirq+0xbf [<ffffffff81007b5f>] xen_restore_fl_direct_reloc+0x4 [<ffffffff8160cc1c>] call_softirq+0x1c VCPU3 rip: ffffffff810013aa hypercall_page+0x3aa flags: 00001202 i nz rsp: ffff8801b1993b10 rax: 0000000000000000 rcx: ffffffff810013aa rdx: ffff8801b1993b54 rbx: ffff8801bff01980 rsi: ffff8801b1993b38 rdi: 0000000000000003 rbp: ffff8801b1993b58 r8: ffff8801bf004760 r9: 0000000000000040 r10: 000000000000001d r11: 0000000000000202 r12: 0000000000000023 r13: 0000000000000001 r14: 0000000000000001 r15: 0000000000000000 cs: e033 ss: e02b ds: 0000 es: 0000 fs: 0000 @ 00007f1a15c06700 gs: 0000 @ ffff8801bff25000/0000000000000000 cr0: 80050033 cr2: 7f78b037de20 cr3: 0e9c9000 cr4: 00002660 dr0: 00000000 dr1: 00000000 dr2: 00000000 dr3: 00000000 dr6: ffff0ff0 dr7: 00000400 Code (instr addr ffffffff810013aa) cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc Stack: 0000000000000001 0000000000000000 ffffffff81394b42 ffff8801b1993b38 0000000000000000 ffff8801b1993b54 ffffffff00000001 0000000000000000 0000001681394ad6 ffff8801b1993b68 ffffffff81394b60 ffff8801b1993bb8 ffffffff8100933f 0000000000000000 0000000000000000 0000000000000000 Call Trace: [<ffffffff810013aa>] hypercall_page+0x3aa <-- [<ffffffff81394b42>] xen_poll_irq_timeout+0x42 [<ffffffff81394b60>] xen_poll_irq+0x10 [<ffffffff8100933f>] xen_spin_lock_slow+0x7f [<ffffffff81009496>] xen_spin_lock+0x56 [<ffffffff8160361e>] _raw_spin_lock+0xe [<ffffffff8104d90d>] double_rq_lock+0x2d [<ffffffff81059f15>] load_balance+0x1f5 [<ffffffff81601576>] __schedule+0x7e6 [<ffffffff8160165f>] schedule+0x3f [<ffffffff816025b5>] schedule_hrtimeout_range_clock+0xc5 [<ffffffff810881c0>] update_rmtp+0x80 [<ffffffff81089524>] hrtimer_start_range_ns+0x14 [<ffffffff81602663>] schedule_hrtimeout_range+0x13 [<ffffffff811af9f8>] ep_poll+0x2d8 [<ffffffff8105a950>] try_to_wake_up+0x2b0 [<ffffffff81290c2b>] security_file_permission+0x8b [<ffffffff8116fee0>] vfs_read+0x170 [<ffffffff811afb35>] sys_epoll_wait+0xc5 [<ffffffff8160ba02>] system_call_fastpath+0x16 VCPU4 rip: ffffffff810013aa hypercall_page+0x3aa flags: 00001246 i z p rsp: ffff8801b45f5ed8 rax: 0000000000000000 rcx: ffffffff810013aa rdx: 0000000000000000 rbx: ffff8801b45f4010 rsi: 0000000000000000 rdi: 0000000000000001 rbp: ffff8801b45f5ef0 r8: 0000000000000000 r9: 0000000000000000 r10: 0000000000000000 r11: 0000000000000246 r12: 0000000000000004 r13: 0000000000000000 r14: 0000000000000000 r15: 0000000000000000 cs: e033 ss: e02b ds: 002b es: 002b fs: 0000 @ 00007fe085364700 gs: 0000 @ ffff8801bff40000/0000000000000000 cr0: 8005003b cr2: 7f7127c10000 cr3: 29c76000 cr4: 00002660 dr0: 00000000 dr1: 00000000 dr2: 00000000 dr3: 00000000 dr6: ffff0ff0 dr7: 00000400 Code (instr addr ffffffff810013aa) cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc Stack: 0000000000000000 0000000000000000 ffffffff81007490 ffff8801b45f5f10 ffffffff81013c3d ffff8801b45f4010 ffffffff81cc7f48 ffff8801b45f5f40 ffffffff8100b0c7 ffffffff81007b19 6811f30c74562efe 0000000000000000 0000000000000000 ffff8801b45f5f50 ffffffff815f523d 0000000000000000 Call Trace: [<ffffffff810013aa>] hypercall_page+0x3aa <-- [<ffffffff81007490>] xen_safe_halt+0x10 [<ffffffff81013c3d>] default_idle+0x5d [<ffffffff8100b0c7>] cpu_idle+0xb7 [<ffffffff81007b19>] xen_irq_enable_direct_reloc+0x4 [<ffffffff815f523d>] cpu_bringup_and_idle+0xe VCPU5 rip: ffffffff810013aa hypercall_page+0x3aa flags: 00001246 i z p rsp: ffff8801b45f7ed8 rax: 0000000000000000 rcx: ffffffff810013aa rdx: 0000000000000000 rbx: ffff8801b45f6010 rsi: 0000000000000000 rdi: 0000000000000001 rbp: ffff8801b45f7ef0 r8: 0000000000000000 r9: 0000000000000000 r10: 0000000000000000 r11: 0000000000000246 r12: 0000000000000005 r13: 0000000000000000 r14: 0000000000000000 r15: 0000000000000000 cs: e033 ss: e02b ds: 002b es: 002b fs: 0000 @ 00007fcf9cf7a700 gs: 0000 @ ffff8801bff5b000/0000000000000000 cr0: 8005003b cr2: 7faf54aac360 cr3: 325e8000 cr4: 00002660 dr0: 00000000 dr1: 00000000 dr2: 00000000 dr3: 00000000 dr6: ffff0ff0 dr7: 00000400 Code (instr addr ffffffff810013aa) cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc Stack: 0000000000000000 0000000000000000 ffffffff81007490 ffff8801b45f7f10 ffffffff81013c3d ffff8801b45f6010 ffffffff81cc7f48 ffff8801b45f7f40 ffffffff8100b0c7 ffffffff81007b19 afec8dc6140892fe 0000000000000000 0000000000000000 ffff8801b45f7f50 ffffffff815f523d 0000000000000000 Call Trace: [<ffffffff810013aa>] hypercall_page+0x3aa <-- [<ffffffff81007490>] xen_safe_halt+0x10 [<ffffffff81013c3d>] default_idle+0x5d [<ffffffff8100b0c7>] cpu_idle+0xb7 [<ffffffff81007b19>] xen_irq_enable_direct_reloc+0x4 [<ffffffff815f523d>] cpu_bringup_and_idle+0xe VCPU6 rip: ffffffff810013aa hypercall_page+0x3aa flags: 00001246 i z p rsp: ffff8801b4601ed8 rax: 0000000000000000 rcx: ffffffff810013aa rdx: 0000000000000000 rbx: ffff8801b4600010 rsi: 0000000000000000 rdi: 0000000000000001 rbp: ffff8801b4601ef0 r8: 0000000000000000 r9: 0000000000000000 r10: 0000000000000000 r11: 0000000000000246 r12: 0000000000000006 r13: 0000000000000000 r14: 0000000000000000 r15: 0000000000000000 cs: e033 ss: e02b ds: 002b es: 002b fs: 0000 @ 00007ff5deb9b700 gs: 0000 @ ffff8801bff76000/0000000000000000 cr0: 8005003b cr2: 7faf5345dff8 cr3: 10160000 cr4: 00002660 dr0: 00000000 dr1: 00000000 dr2: 00000000 dr3: 00000000 dr6: ffff0ff0 dr7: 00000400 Code (instr addr ffffffff810013aa) cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc Stack: 0000000000000000 0000000000000000 ffffffff81007490 ffff8801b4601f10 ffffffff81013c3d ffff8801b4600010 ffffffff81cc7f48 ffff8801b4601f40 ffffffff8100b0c7 ffffffff81007b19 f6fcdef4e1be86f4 0000000000000000 0000000000000000 ffff8801b4601f50 ffffffff815f523d 0000000000000000 Call Trace: [<ffffffff810013aa>] hypercall_page+0x3aa <-- [<ffffffff81007490>] xen_safe_halt+0x10 [<ffffffff81013c3d>] default_idle+0x5d [<ffffffff8100b0c7>] cpu_idle+0xb7 [<ffffffff81007b19>] xen_irq_enable_direct_reloc+0x4 [<ffffffff815f523d>] cpu_bringup_and_idle+0xe VCPU7 rip: ffffffff810013aa hypercall_page+0x3aa flags: 00001246 i z p rsp: ffff8801b4603ed8 rax: 0000000000000000 rcx: ffffffff810013aa rdx: 0000000000000000 rbx: ffff8801b4602010 rsi: 0000000000000000 rdi: 0000000000000001 rbp: ffff8801b4603ef0 r8: 0000000000000000 r9: 0000000000000000 r10: 0000000000000000 r11: 0000000000000246 r12: 0000000000000007 r13: 0000000000000000 r14: 0000000000000000 r15: 0000000000000000 cs: e033 ss: e02b ds: 002b es: 002b fs: 0000 @ 00007fe081751700 gs: 0000 @ ffff8801bff91000/0000000000000000 cr0: 8005003b cr2: 7faf54bdfde0 cr3: 29c76000 cr4: 00002660 dr0: 00000000 dr1: 00000000 dr2: 00000000 dr3: 00000000 dr6: ffff0ff0 dr7: 00000400 Code (instr addr ffffffff810013aa) cc cc cc cc cc cc cc cc cc cc cc 51 41 53 b8 1d 00 00 00 0f 05 <41> 5b 59 c3 cc cc cc cc cc cc cc Stack: 0000000000000000 0000000000000000 ffffffff81007490 ffff8801b4603f10 ffffffff81013c3d ffff8801b4602010 ffffffff81cc7f48 ffff8801b4603f40 ffffffff8100b0c7 ffffffff81007b19 0c1d63da394a80a3 0000000000000000 0000000000000000 ffff8801b4603f50 ffffffff815f523d 0000000000000000 Call Trace: [<ffffffff810013aa>] hypercall_page+0x3aa <-- [<ffffffff81007490>] xen_safe_halt+0x10 [<ffffffff81013c3d>] default_idle+0x5d [<ffffffff8100b0c7>] cpu_idle+0xb7 [<ffffffff81007b19>] xen_irq_enable_direct_reloc+0x4 [<ffffffff815f523d>] cpu_bringup_and_idle+0xe ** Affects: linux-lts-backport-oneiric (Ubuntu) Importance: Undecided Status: New -- You received this bug notification because you are a member of Ubuntu Bugs, which is subscribed to Ubuntu. https://bugs.launchpad.net/bugs/1011792 Title: Scheduler deadlock running 3.0.0-20-virtual on c1.xlarge EC2 instance To manage notifications about this bug go to: https://bugs.launchpad.net/ubuntu/+source/linux-lts-backport-oneiric/+bug/1011792/+subscriptions -- ubuntu-bugs mailing list ubuntu-bugs@lists.ubuntu.com https://lists.ubuntu.com/mailman/listinfo/ubuntu-bugs