On Thu 08/06/2017 16:55, Martin Pieuchot wrote: > On 07/06/17(Wed) 09:43, Björn Ketelaars wrote: > > On Sat 03/06/2017 08:44, Björn Ketelaars wrote: > > > > > > Reverting back to the previous kernel fixed the issue above. Question: can > > > someone give a hint on how to track this issue? > > > > After a bit of experimenting I'm able to reproduce the problem. Summary is > > that queueing in pf and use of a current (after May 30), multi processor > > kernel (bsd.mp from snapshots) causes these specific watchdog timeouts > > followed by a system freeze. > > > > Issue is 'gone' when: > > 1.) using an older kernel (before May 30); > > 2.) removal of queueing statements from pf.conf. Included below the specific > > snippet; > > 3.) switch from MP kernel to SP kernel. > > > > New observation is that while queueing, using a MP kernel, the download > > bandwidth is only a fraction of what is expected. Exchanging the MP kernel > > with a SP kernel restores the download bandwidth to expected level. > > > > I'm guessing that this issue is related to recent work on PF? > > It's certainly a problem in, or exposed by, re(4) with the recent MP work > in the network stack. > > It would help if you could build a kernel with MP_LOCKDEBUG defined and > see if the resulting kernel enters ddb(4) instead of freezing. > > Thanks, > Martin
Thanks for the hint! It helped in entering ddb. I collected a bit of output, which you can find below. If I read the trace correctly the crash is related to line 1750 of sys/dev/ic/re.c: d->rl_cmdstat |= htole32(RL_TDESC_CMD_EOF); ddb{0}> show panic the kernel did not panic ddb{0}> trace db_enter(ffffffff8196acc0,ffff80002200b8b8,ffff8000ffffe6a8,10,ffff800022023c58 ,282) at db_enter+0x9 x86_ipi_handler(ffffffff8196acd0,ffffffff819f26a0,14a2d1,c,ffff800022023cd0,fff fffff819f95a0) at x86_ipi_handler+0x85 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x1c --- interrupt --- ___mp_lock(ffffffff819f26a0,ffffffff818552f0,1,1,ffff80002200b8b8,ffff8000ffffe 6a8) at ___mp_lock+0x4a ___mp_acquire_count(ffffffff819f26a0,1,ffff80002200b8b8,ffff8000ffffe6a8,ffffff ff81a11680,ffffffff81707182) at ___mp_acquire_count+0x33 mi_switch(0,0,ffff8000ffffe6a8,ffff800022023ed0,ffff8000ffffe6a8,ffff800022023e c0) at mi_switch+0x22b sleep_finish(ffff800022023ed0,1,ffffffff810b7110,ffff800022023ed0,0,0) at sleep _finish+0xc2 softclock_thread(ffff8000ffffe6a8,2a2,0,0,29e123d3cabb9a12,ffff800022023f10) at softclock_thread+0x94 end trace frame: 0x0, count: -8 ddb{0}> machine ddbcpu 1 Stopped at re_encap+0x24d: movl 0(%r15),%eax ddb{1}> show panic the kernel did not panic ddb{1}> trace re_encap(ffff800000084000,363,ffffff00d221cc00,ffff8000000842c0,400,ffff8000000 84000) at re_encap+0x24d re_start(ffff8000000842c0,7,ffffff00d171d000,ffff800000084338,ffff8000000842c0, ffffffff815fdbf0) at re_start+0x8c ifq_serialize(ffff8000000842c0,ffff800000084360,ffff800000084090,ffffff00d2fbd0 00,ffff8000000842c0,0) at ifq_serialize+0xf8 if_enqueue(ffff800000084090,ffffff00d2fbd000,ffff80000096a240,ffffff00d2fbd000, 37,2) at if_enqueue+0x90 ether_output(ffff800000084090,ffffff00d171d000,ffff80000096a240,ffffff011b66777 8,ffff8000004d1000,ffff80000096a240) at ether_output+0x1d6 ip_output(ffffff00d171d000,0,ffff800022032cc0,1,0,0) at ip_output+0x8a1 ip_forward(ffffff00d171d000,ffff8000002a4090,0,0,8244c78a,8244c78a) at ip_forwa rd+0x1e7 ipv4_input(ffff8000002a4090,ffffff00d171d000,800,800,ffffff000990ff80,ffff80000 02a4090) at ipv4_input+0x4f7 ether_input(ffff8000002a4090,ffffff00d171d000,0,ffffffff810afed0,ffff8000002a42 88,ffff8000002a4090) at ether_input+0xbd if_input_process(2,ffff800022032eb0,0,0,ffff800022032eb0,ffff800000019080) at i f_input_process+0xfa taskq_thread(ffff800000019080,2a2,ffff800000019080,ffffffff81493240,0,ffff80002 2032f10) at taskq_thread+0x79 end trace frame: 0x0, count: -11 ddb{1}> dmesg OpenBSD 6.1-current (GENERIC.MP) #7: Thu Jun 8 19:10:36 CEST 2017 b...@gateway.lan:/home/code/src/sys/arch/amd64/compile/GENERIC.MP real mem = 4245995520 (4049MB) avail mem = 4111519744 (3921MB) mpath0 at root scsibus0 at mpath0: 256 targets mainbus0 at root bios0 at mainbus0: SMBIOS rev. 2.7 @ 0xdf16d820 (6 entries) bios0: vendor coreboot version "SageBios_PCEngines_APU-45" date 04/05/2014 bios0: PC Engines APU acpi0 at bios0: rev 0 acpi0: sleep states S0 S1 S3 S4 S5 acpi0: tables DSDT FACP SPCR HPET APIC HEST SSDT SSDT SSDT acpi0: wakeup devices AGPB(S4) HDMI(S4) PBR4(S4) PBR5(S4) PBR6(S4) PBR7(S4) PE2 0(S4) PE21(S4) PE22(S4) PE23(S4) PIBR(S4) UOH1(S3) UOH2(S3) UOH3(S3) UOH4(S3) U OH5(S3) [...] acpitimer0 at acpi0: 3579545 Hz, 32 bits acpihpet0 at acpi0: 14318180 Hz acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat cpu0 at mainbus0: apid 0 (boot processor) cpu0: AMD G-T40E Processor, 1000.13 MHz cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,C FLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,MWAIT,SSSE3,CX16,POPCNT,NXE,MMXX,FFXSR,PAGE1GB ,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,IBS,SKINIT,IT SC cpu0: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 1 6-way L2 cache cpu0: 8 4MB entries fully associative cpu0: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative cpu0: TSC frequency 1000129460 Hz cpu0: smt 0, core 0, package 0 mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges cpu0: apic clock running at 199MHz cpu0: mwait min=64, max=64, IBE cpu1 at mainbus0: apid 1 (application processor) cpu1: AMD G-T40E Processor, 1000.00 MHz cpu1: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,C FLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,MWAIT,SSSE3,CX16,POPCNT,NXE,MMXX,FFXSR,PAGE1GB ,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,IBS,SKINIT,IT SC cpu1: 32KB 64b/line 2-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 64b/line 1 6-way L2 cache cpu1: 8 4MB entries fully associative cpu1: DTLB 40 4KB entries fully associative, 8 4MB entries fully associative cpu1: smt 0, core 1, package 0 ioapic0 at mainbus0: apid 2 pa 0xfec00000, version 21, 24 pins acpiprt0 at acpi0: bus -1 (AGPB) acpiprt1 at acpi0: bus -1 (HDMI) acpiprt2 at acpi0: bus 1 (PBR4) acpiprt3 at acpi0: bus 2 (PBR5) acpiprt4 at acpi0: bus 3 (PBR6) acpiprt5 at acpi0: bus -1 (PBR7) acpiprt6 at acpi0: bus 5 (PE20) acpiprt7 at acpi0: bus -1 (PE21) acpiprt8 at acpi0: bus -1 (PE22) acpiprt9 at acpi0: bus -1 (PE23) acpiprt10 at acpi0: bus 0 (PCI0) acpiprt11 at acpi0: bus 4 (PIBR) acpicpu0 at acpi0: C2(0@100 io@0x841), C1(@1 halt!), PSS acpicpu1 at acpi0: C2(0@100 io@0x841), C1(@1 halt!), PSS acpibtn0 at acpi0: PWRB cpu0: 1000 MHz: speeds: 1000 800 MHz pci0 at mainbus0 bus 0 pchb0 at pci0 dev 0 function 0 "AMD AMD64 14h Host" rev 0x00 ppb0 at pci0 dev 4 function 0 "AMD AMD64 14h PCIE" rev 0x00: msi pci1 at ppb0 bus 1 re0 at pci1 dev 0 function 0 "Realtek 8168" rev 0x06: RTL8168E/8111E (0x2c00), m si, address 00:0d:b9:33:8e:a4 rgephy0 at re0 phy 7: RTL8169S/8110S/8211 PHY, rev. 4 ppb1 at pci0 dev 5 function 0 "AMD AMD64 14h PCIE" rev 0x00: msi pci2 at ppb1 bus 2 re1 at pci2 dev 0 function 0 "Realtek 8168" rev 0x06: RTL8168E/8111E (0x2c00), m si, address 00:0d:b9:33:8e:a5 rgephy1 at re1 phy 7: RTL8169S/8110S/8211 PHY, rev. 4 ppb2 at pci0 dev 6 function 0 "AMD AMD64 14h PCIE" rev 0x00: msi pci3 at ppb2 bus 3 re2 at pci3 dev 0 function 0 "Realtek 8168" rev 0x06: RTL8168E/8111E (0x2c00), m si, address 00:0d:b9:33:8e:a6 rgephy2 at re2 phy 7: RTL8169S/8110S/8211 PHY, rev. 4 ahci0 at pci0 dev 17 function 0 "ATI SBx00 SATA" rev 0x40: apic 2 int 19, AHCI 1 .2 ahci0: port 0: 6.0Gb/s scsibus1 at ahci0: 32 targets sd0 at scsibus1 targ 0 lun 0: <ATA, SAMSUNG SSD PM83, CXM1> SCSI3 0/direct fixe d naa.5002538043584d30 sd0: 30533MB, 512 bytes/sector, 62533296 sectors, thin ohci0 at pci0 dev 18 function 0 "ATI SB700 USB" rev 0x00: apic 2 int 18, versio n 1.0, legacy support ehci0 at pci0 dev 18 function 2 "ATI SB700 USB2" rev 0x00: apic 2 int 17 usb0 at ehci0: USB revision 2.0 uhub0 at usb0 configuration 1 interface 0 "ATI EHCI root hub" rev 2.00/1.00 add r 1 ohci1 at pci0 dev 19 function 0 "ATI SB700 USB" rev 0x00: apic 2 int 18, versio n 1.0, legacy support ehci1 at pci0 dev 19 function 2 "ATI SB700 USB2" rev 0x00: apic 2 int 17 usb1 at ehci1: USB revision 2.0 uhub1 at usb1 configuration 1 interface 0 "ATI EHCI root hub" rev 2.00/1.00 add r 1 piixpm0 at pci0 dev 20 function 0 "ATI SBx00 SMBus" rev 0x42: polling iic0 at piixpm0 pcib0 at pci0 dev 20 function 3 "ATI SB700 ISA" rev 0x40 ppb3 at pci0 dev 20 function 4 "ATI SB600 PCI" rev 0x40 pci4 at ppb3 bus 4 ohci2 at pci0 dev 20 function 5 "ATI SB700 USB" rev 0x00: apic 2 int 18, versio n 1.0, legacy support ppb4 at pci0 dev 21 function 0 "ATI SB800 PCIE" rev 0x00 pci5 at ppb4 bus 5 ohci3 at pci0 dev 22 function 0 "ATI SB700 USB" rev 0x00: apic 2 int 18, versio n 1.0, legacy support ehci2 at pci0 dev 22 function 2 "ATI SB700 USB2" rev 0x00: apic 2 int 17 usb2 at ehci2: USB revision 2.0 uhub2 at usb2 configuration 1 interface 0 "ATI EHCI root hub" rev 2.00/1.00 add r 1 pchb1 at pci0 dev 24 function 0 "AMD AMD64 14h Link Cfg" rev 0x43 pchb2 at pci0 dev 24 function 1 "AMD AMD64 14h Address Map" rev 0x00 pchb3 at pci0 dev 24 function 2 "AMD AMD64 14h DRAM Cfg" rev 0x00 km0 at pci0 dev 24 function 3 "AMD AMD64 14h Misc Cfg" rev 0x00 pchb4 at pci0 dev 24 function 4 "AMD AMD64 14h CPU Power" rev 0x00 pchb5 at pci0 dev 24 function 5 "AMD AMD64 14h Reserved" rev 0x00 pchb6 at pci0 dev 24 function 6 "AMD AMD64 14h NB Power" rev 0x00 pchb7 at pci0 dev 24 function 7 "AMD AMD64 14h Reserved" rev 0x00 usb3 at ohci0: USB revision 1.0 uhub3 at usb3 configuration 1 interface 0 "ATI OHCI root hub" rev 1.00/1.00 add r 1 usb4 at ohci1: USB revision 1.0 uhub4 at usb4 configuration 1 interface 0 "ATI OHCI root hub" rev 1.00/1.00 add r 1 isa0 at pcib0 isadma0 at isa0 com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo com0: console com1 at isa0 port 0x2f8/8 irq 3: ns16550a, 16 byte fifo pcppi0 at isa0 port 0x61 spkr0 at pcppi0 lpt0 at isa0 port 0x378/4 irq 7 wbsio0 at isa0 port 0x2e/2: NCT5104D rev 0x52 usb5 at ohci2: USB revision 1.0 uhub5 at usb5 configuration 1 interface 0 "ATI OHCI root hub" rev 1.00/1.00 add r 1 usb6 at ohci3: USB revision 1.0 uhub6 at usb6 configuration 1 interface 0 "ATI OHCI root hub" rev 1.00/1.00 add r 1 vmm0 at mainbus0: SVM/RVI umass0 at uhub2 port 1 configuration 1 interface 0 "Generic Flash Card Reader/W riter" rev 2.01/1.00 addr 2 umass0: using SCSI over Bulk-Only scsibus2 at umass0: 2 targets, initiator 0 sd1 at scsibus2 targ 1 lun 0: <Multiple, Card Reader, 1.00> SCSI2 0/direct remo vable serial.058f6366058F63666485 sd1: 7600MB, 512 bytes/sector, 15564800 sectors vscsi0 at root scsibus3 at vscsi0: 256 targets softraid0 at root scsibus4 at softraid0: 256 targets root on sd0a (28fcdc10008babff.a) swap on sd0b dump on sd0b re1: watchdog timeout re1: watchdog timeout re1: watchdog timeout re1: watchdog timeout re1: watchdog timeout re1: watchdog timeout re0: watchdog timeout re1: watchdog timeout re1: watchdog timeout re0: watchdog timeout re1: watchdog timeout uvm_fault(0xffffffff81a10fc0, 0x400, 0, 1) -> e kernel: page fault trap, code=0 Stopped at re_encap+0x24d: movl 0(%r15),%eax ddb{1}> ps PID TID PPID UID S FLAGS WAIT COMMAND 82195 369179 56172 0 3 0x83 poll systat 56172 147644 79847 1000 3 0x10008b pause ksh 79847 461268 7799 1000 3 0x90 select sshd 7799 55628 13498 0 3 0x92 poll sshd 61811 278207 1 0 3 0x100083 ttyin getty 74015 14393 1 0 3 0x100098 poll cron 59960 272647 1 0 3 0x80 kqread apmd 87200 406555 1 109 3 0x90 kqread ftp-proxy 45038 23411 15230 95 3 0x100092 kqread smtpd 93110 210830 15230 103 3 0x100092 kqread smtpd 73423 112738 15230 95 3 0x100092 kqread smtpd 68375 247337 15230 95 3 0x100092 kqread smtpd 27674 140042 15230 95 3 0x100092 kqread smtpd 48015 17028 15230 95 3 0x100092 kqread smtpd 15230 242238 1 0 3 0x100080 kqread smtpd 12034 314719 1 92 3 0x100090 kqread rtadvd 18251 517215 1 77 3 0x100090 poll dhcpd 13498 267120 1 0 3 0x80 select sshd 95968 319521 1 0 3 0x100080 poll ntpd 52955 506732 85253 83 3 0x100092 poll ntpd 85253 410399 1 83 3 0x100092 poll ntpd 60302 14142 1 53 3 0x90 kqread unbound 73871 165819 5712 74 3 0x100090 bpf pflogd 5712 338263 1 0 3 0x80 netio pflogd 29839 479602 54341 73 2 0x100090 syslogd 54341 102467 1 0 3 0x100082 netio syslogd 53017 285789 1 77 3 0x100090 poll dhclient 74581 122082 1 0 3 0x80 poll dhclient 54066 116457 74298 115 3 0x100092 kqread slaacd 64846 12763 74298 115 3 0x100092 kqread slaacd 74298 260076 1 0 3 0x80 kqread slaacd 64842 392827 1 0 3 0x80 mfsidl mount_mfs 51405 232671 0 0 3 0x14200 pgzero zerothread 48871 331472 0 0 3 0x14200 aiodoned aiodoned 34037 13488 0 0 3 0x14200 syncer update 91469 431877 0 0 3 0x14200 cleaner cleaner 4505 464914 0 0 3 0x14200 reaper reaper 58806 32124 0 0 3 0x14200 pgdaemon pagedaemon 11816 283378 0 0 3 0x14200 bored crynlk 20420 416822 0 0 3 0x14200 bored crypto 42218 73014 0 0 3 0x14200 pftm pfpurge 23356 409989 0 0 3 0x14200 bored sensors 75378 434260 0 0 3 0x14200 usbtsk usbtask 42666 210924 0 0 3 0x14200 usbatsk usbatsk 3299 130961 0 0 3 0x40014200 acpi0 acpi0 24358 424982 0 0 3 0x40014200 idle1 *90228 269751 0 0 7 0x14200 softnet 11942 439335 0 0 3 0x14200 bored systqmp 42690 165546 0 0 3 0x14200 bored systq 98103 311380 0 0 7 0x40014200 softclock 75229 252289 0 0 3 0x40014200 idle0 65336 512854 0 0 3 0x14200 bored sbar 1 38293 0 0 3 0x82 wait init 0 0 -1 0 3 0x10200 scheduler swapper ddb{1}> show uvm Current UVM status: pagesize=4096 (0x1000), pagemask=0xfff, pageshift=12 1005861 VM pages: 145661 active, 2430 inactive, 1 wired, 806399 free (106967 z ero) min 10% (25) anon, 10% (25) vnode, 5% (12) vtext pages 0 anon, 0 vnode, 0 vtext freemin=33528, free-target=44704, inactive-target=0, wired-max=335287 faults=524182, traps=507313, intrs=368594, ctxswitch=212365 fpuswitch=198 softint=62364, syscalls=818564, kmapent=15 fault counts: noram=0, noanon=0, noamap=0, pgwait=0, pgrele=0 ok relocks(total)=6921(6921), anget(retries)=193190(0), amapcopy=349847 neighbor anon/obj pg=14074/136541, gets(lock/unlock)=48468/6921 cases: anon=164917, anoncow=28273, obj=41539, prcopy=6929, przero=282512 daemon and swap counts: woke=0, revs=0, scans=0, obscans=0, anscans=0 busy=0, freed=0, reactivate=0, deactivate=0 pageouts=0, pending=0, nswget=0 nswapdev=0, nanon=0, nanonneeded=0 nfreeanon=0 swpages=0, swpginuse=0, swpgonly=0 paging=0 kernel pointers: objs(kern)=0xffffffff819d3d80 ddb{1}> show bcstats Current Buffer Cache status: numbufs 6896 busymapped 1, delwri 5 kvaslots 6553 avail kva slots 6552 bufpages 27498, dmapages 27498, dirtypages 20 pendingreads 0, pendingwrites 0 highflips 0, highflops 0, dmaflips 0 ddb{1}> show registers rdi 0xffffffff81974840 pci_bus_dma_tag rsi 0 rbp 0xffff800022032900 rbx 0xffff8000000842c0 rdx 0 rcx 0xffff800000240800 rax 0xffffff00d221cc00 r8 0x4 r9 0 r10 0x2361a6a100 r11 0xffffffff8139e930 x86_bus_space_mem_read_4 r12 0x3630 __ALIGN_SIZE+0x2630 r13 0x363 hib_hlt_real+0x5e r14 0xffff800000084000 r15 0x400 hib_hlt_real+0xfb rip 0xffffffff812757fd re_encap+0x24d cs 0x8 rflags 0x10216 mptramp_longmode+0x16e rsp 0xffff800022032760 ss 0x10 re_encap+0x24d: movl 0(%r15),%eax ddb{1}> boot reboot panic: rw_enter: netlock locking against myself Stopped at db_enter+0x9: leave TID PID UID PRFLAGS PFLAGS CPU COMMAND *269751 90228 0 0x14000 0x200 1 softnet 311380 98103 0 0x14000 0x40000200 0 softclock db_enter(ffffffff8172639e,ffff800022032130,10,ffff800022032110,282,8) at db_ent er+0x9 panic(ffffffff817ad478,14,ffff8000ffffed50,ffffffff8196a2b0,ffff8000ffffed54,ff ff800022032240) at panic+0x102 _rw_enter(ffffffff8196a2b0,1,ffff80002202e000,0,ffffffff8197dea0,ffff8000220324 18) at _rw_enter+0x20f if_downall(ffffffff8197dea0,ffff800022032418,4804,ffff8000220323b8,29e123d3cabb 9a12,0) at if_downall+0x2d boot(4804,ffffffff81945120,4804,ffffffff8197dea0,ffff800022032418,4804) at boot +0xeb reboot(4804,5,0,ffff8000220323f0,ffff8000220323b8,d) at reboot+0x37 db_boot_reboot_cmd(ffffffff812757fd,0,ffffffffffffffff,ffff800022032420,0,29e12 3d3cabb9a12) at db_boot_reboot_cmd+0x22 db_command(ffffffff8197dea0,ffffffff8197dae0,ffff800022032690,6,0,78) at db_com mand+0x13a db_command_loop(29e123d3cabb9a12,0,ffffffff8165620f,ffff80002202e000,0,ffff8000 22032690) at db_command_loop+0xb0 db_trap(6,0,7,29e123d3cabb9a12,813b34c7,ffffffff813c2144) at db_trap+0xf4 db_ktrap(6,0,ffff800022032690,1,ffff800022032690,ffff8000ffffed50) at db_ktrap+ 0x10f trap() at trap+0x16d --- trap (number 6) --- re_encap(ffff800000084000,363,ffffff00d221cc00,ffff8000000842c0,400,ffff8000000 84000) at re_encap+0x24d re_start(ffff8000000842c0,7,ffffff00d171d000,ffff800000084338,ffff8000000842c0, ffffffff815fdbf0) at re_start+0x8c end trace frame: 0xffff8000220329c0, count: 0 https://www.openbsd.org/ddb.html describes the minimum info required in bug reports. Insufficient info makes it difficult to find and fix bugs. -- Björn Ketelaars GPG key: 0x4F0E5F21