Ok, here you go ... In approx. 4 out of 10 times the boot process does not conclude.
First variant: ... scsibus2 at vscsi0: 256 targets softraid0 at root scsibus3 at softraid0: 256 targets root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b gen6 pad1 0 0 pad1 4095 ffff8000 pad2 0 81c7a0 pad2 4095 0 kernel: protection fault trap, code=0 Stopped at i915_vma_pin+0x167: testl %r15d,0x160(%rax) ddb{1}> trace i915_vma_pin(ffff80000081c780,0,10000,420) at i915_vma_pin+0x167 i915_ggtt_pin(ffff80000081c780,10000,20) at i915_ggtt_pin+0x91 gen6_ppgtt_pin(ffff80000080f000) at gen6_ppgtt_pin+0xd7 __intel_context_do_pin(fffffd817e14ed80) at __intel_context_do_pin+0xca intel_engines_init(ffff800000104c38) at intel_engines_init+0x4b5 intel_gt_init(ffff800000104c38) at intel_gt_init+0x130 i915_gem_init(ffff800000100000) at i915_gem_init+0xa3 i915_driver_probe(ffff800000100000,ffffffff81f14df0) at i915_driver_probe+0x7ed inteldrm_attachhook(ffff800000100000) at inteldrm_attachhook+0x43 config_process_deferred_mountroot() at config_process_deferred_mountroot+0x6b main(0) at main+0x733 end trace frame: 0x0, count: -11 ddb{1}> mach ddbcpu 0 Stopped at x86_ipi_db+0x12: leave ddb{0}> trace x86_ipi_db(ffffffff820f7ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 _kernel_lock() at _kernel_lock+0xb2 softintr_dispatch(0) at softintr_dispatch+0x4a Xsoftclock() at Xsoftclock+0x1f acpicpu_idle() at acpicpu_idle+0x1ea sched_idle(ffffffff820f7ff0) at sched_idle+0x27e end trace frame: 0x0, count: -8 ddb{0}> mach ddb cpu 2 Symbol not found ddb{0}> mach ddbcpu 2 Stopped at x86_ipi_db+0x12: leave ddb{2}> trace x86_ipi_db(ffff80001ff42ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x1ea sched_idle(ffff80001ff42ff0) at sched_idle+0x27e end trace frame: 0x0, count: -5 ddb{2}> mach ddbcpu 3 Stopped at x86_ipi_db+0x12: leave ddb{3}> trace x86_ipi_db(ffff80001ff4bff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 mtx_enter_try(ffffffff822418a8) at mtx_enter_try+0x84 mtx_enter(ffffffff822418a8) at mtx_enter+0x35 uvm_pagezero_thread(ffff800020259a40) at uvm_pagezero_thread+0x74 end trace frame: 0x0, count: -6 ddb{3}> mach ddbcpu 1 Stopped at i915_vma_pin+0x167: testl %r15d,0x160(%rax) ddb{1}> ps PID TID PPID UID S FLAGS WAIT COMMAND 57276 406435 0 0 3 0x14200 bored i915-userptr-acq 18828 410208 0 0 3 0x14200 bored i915_flip 72532 503959 0 0 3 0x14200 bored i915_modeset 26696 414393 0 0 3 0x14200 bored i915-dp 76284 351576 0 0 3 0x14200 bored i915 27501 406247 0 0 3 0x14200 bored smr 24358 292417 0 0 7 0x14200 zerothread 58546 370476 0 0 3 0x14200 aiodoned aiodoned 69702 385016 0 0 3 0x14200 syncer update 33778 379219 0 0 3 0x14200 cleaner cleaner 29467 427921 0 0 3 0x14200 reaper reaper 71585 17515 0 0 3 0x14200 pgdaemon pagedaemon 38422 37499 0 0 3 0x14200 bored crynlk 96968 404247 0 0 3 0x14200 bored crypto 85767 451266 0 0 3 0x14200 usbtsk usbtask 64966 267267 0 0 3 0x14200 usbatsk usbatsk 56027 154673 0 0 3 0x14200 bored drmtskl 69848 454575 0 0 3 0x14200 bored drmlwq 90459 232289 0 0 3 0x14200 bored drmlwq 7935 270871 0 0 3 0x14200 bored drmlwq 34417 347812 0 0 3 0x14200 bored drmlwq 40915 390021 0 0 3 0x14200 bored drmubwq 35151 389374 0 0 3 0x14200 bored drmubwq 57449 443683 0 0 3 0x14200 bored drmubwq 71227 313856 0 0 3 0x14200 bored drmubwq 49029 510183 0 0 3 0x14200 bored drmhpwq 74799 162896 0 0 3 0x14200 bored drmhpwq 1459 354055 0 0 3 0x14200 bored drmhpwq 64384 453576 0 0 3 0x14200 bored drmhpwq 30147 158249 0 0 3 0x14200 bored drmwq 31840 434648 0 0 3 0x14200 bored drmwq 48853 206487 0 0 3 0x14200 bored drmwq 45954 355302 0 0 3 0x14200 bored drmwq 63591 457879 0 0 3 0x40014200 acpi0 acpi0 34899 137527 0 0 1 0x14200 idle3 42295 187703 0 0 7 0x40014200 idle2 5356 154045 0 0 3 0x40014200 idle1 60193 384404 0 0 3 0x14200 bored sensors 59943 96119 0 0 3 0x14200 bored softnet 94208 343230 0 0 3 0x14200 bored systqmp 75880 413625 0 0 3 0x14200 bored systq 32713 128466 0 0 3 0x40014200 bored softclock 16101 240213 0 0 7 0x40014200 idle0 1 275798 0 0 3 0 initexec swapper * 0 0 -1 0 7 0x10200 swapper ddb{1}> show uvm Current UVM status: pagesize=4096 (0x1000), pagemask=0xfff, pageshift=12 971041 VM pages: 0 active, 0 inactive, 20 wired, 963032 free (7664 zero) min 10% (25) anon, 10% (25) vnode, 5% (12) vtext freemin=32368, free-target=43157, inactive-target=0, wired-max=323680 faults=0, traps=3, intrs=6440, ctxswitch=166 fpuswitch=0 softint=137, syscalls=0, kmapent=12 fault counts: noram=0, noanon=0, noamap=0, pgwait=0, pgrele=0 ok relocks(total)=0(0), anget(retries)=0(0), amapcopy=0 neighbor anon/obj pg=0/0, gets(lock/unlock)=0/0 cases: anon=0, anoncow=0, obj=0, prcopy=0, przero=0 daemon and swap counts: woke=0, revs=0, scans=0, obscans=0, anscans=0 busy=0, freed=0, reactivate=0, deactivate=0 pageouts=0, pending=0, nswget=0 nswapdev=1 swpages=1068660, swpginuse=0, swpgonly=0 paging=0 kernel pointers: objs(kern)=0xffffffff821ac878 ddb{1}> show bcstats Current Buffer Cache status: numbufs 2 busymapped 0, delwri 0 kvaslots 5964 avail kva slots 5964 bufpages 5, dmapages 5, dirtypages 0 pendingreads 0, pendingwrites 0 highflips 0, highflops 0, dmaflips 0 ddb{1}> Second variant: ... scsibus2 at vscsi0: 256 targets softraid0 at root scsibus3 at softraid0: 256 targets root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b gen6 pad1 0 0 pad1 4095 ffff8000 pad2 0 8187a0 pad2 4095 0 kernel: protection fault trap, code=0 Stopped at i915_vma_pin+0x167: testl %r15d,0x160(%rax) ddb{1}> trace i915_vma_pin(ffff800000818780,0,10000,420) at i915_vma_pin+0x167 i915_ggtt_pin(ffff800000818780,10000,20) at i915_ggtt_pin+0x91 gen6_ppgtt_pin(ffff80000080e000) at gen6_ppgtt_pin+0xd7 __intel_context_do_pin(fffffd817e160d80) at __intel_context_do_pin+0xca intel_engines_init(ffff800000104c38) at intel_engines_init+0x4b5 intel_gt_init(ffff800000104c38) at intel_gt_init+0x130 i915_gem_init(ffff800000100000) at i915_gem_init+0xa3 i915_driver_probe(ffff800000100000,ffffffff81fbff40) at i915_driver_probe+0x7ed inteldrm_attachhook(ffff800000100000) at inteldrm_attachhook+0x43 config_process_deferred_mountroot() at config_process_deferred_mountroot+0x6b main(0) at main+0x733 end trace frame: 0x0, count: -11 ddb{1}> mach ddbcpu 0 Stopped at x86_ipi_db+0x12: leave ddb{0}> trace x86_ipi_db(ffffffff820f3ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 _kernel_lock() at _kernel_lock+0xb2 softintr_dispatch(0) at softintr_dispatch+0x4a Xsoftclock() at Xsoftclock+0x1f acpicpu_idle() at acpicpu_idle+0x1ea sched_idle(ffffffff820f3ff0) at sched_idle+0x27e end trace frame: 0x0, count: -8 ddb{0}> mach ddbcpu 2 Stopped at x86_ipi_db+0x12: leave ddb{2}> trace x86_ipi_db(ffff80001ff42ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 pagezero() at pagezero+0x29 end trace frame: 0x0, count: -4 ddb{2}> mach ddbcpu 3 Stopped at x86_ipi_db+0x12: leave ddb{3}> trace x86_ipi_db(ffff80001ff4bff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x1ea sched_idle(ffff80001ff4bff0) at sched_idle+0x27e end trace frame: 0x0, count: -5 ddb{3}> mach ddbcpu 1 Stopped at i915_vma_pin+0x167: testl %r15d,0x160(%rax) ddb{1}> ps PID TID PPID UID S FLAGS WAIT COMMAND 57947 393667 0 0 3 0x14200 bored i915-userptr-acq 981 256282 0 0 3 0x14200 bored i915_flip 72134 168056 0 0 3 0x14200 bored i915_modeset 23700 230975 0 0 3 0x14200 bored i915-dp 21629 383627 0 0 3 0x14200 bored i915 21841 362736 0 0 3 0x14200 bored smr 95539 39484 0 0 7 0x14200 zerothread 33137 71718 0 0 3 0x14200 aiodoned aiodoned 34771 298806 0 0 3 0x14200 syncer update 70634 43467 0 0 3 0x14200 cleaner cleaner 59404 46135 0 0 3 0x14200 reaper reaper 41881 479280 0 0 3 0x14200 pgdaemon pagedaemon 4574 523029 0 0 3 0x14200 bored crynlk 12450 482595 0 0 3 0x14200 bored crypto 42426 256415 0 0 3 0x14200 usbtsk usbtask 93641 233033 0 0 3 0x14200 usbatsk usbatsk 77876 353886 0 0 3 0x14200 bored drmtskl 55832 245118 0 0 3 0x14200 bored drmlwq 79725 176992 0 0 3 0x14200 bored drmlwq 86580 323296 0 0 3 0x14200 bored drmlwq 12480 7554 0 0 3 0x14200 bored drmlwq 72464 509745 0 0 3 0x14200 bored drmubwq 20859 82297 0 0 3 0x14200 bored drmubwq 43431 60053 0 0 3 0x14200 bored drmubwq 69904 167702 0 0 3 0x14200 bored drmubwq 17767 438272 0 0 3 0x14200 bored drmhpwq 16087 458102 0 0 3 0x14200 bored drmhpwq 64274 437063 0 0 3 0x14200 bored drmhpwq 63524 468676 0 0 3 0x14200 bored drmhpwq 10231 61781 0 0 3 0x14200 bored drmwq 74996 96653 0 0 3 0x14200 bored drmwq 9420 373331 0 0 3 0x14200 bored drmwq 95965 442364 0 0 3 0x14200 bored drmwq 69608 20515 0 0 3 0x40014200 acpi0 acpi0 68858 32798 0 0 7 0x40014200 idle3 94100 274306 0 0 1 0x14200 idle2 30778 109612 0 0 3 0x40014200 idle1 45549 278157 0 0 3 0x14200 bored sensors 14889 481387 0 0 3 0x14200 bored softnet 51916 229545 0 0 3 0x14200 bored systqmp 39161 389488 0 0 3 0x14200 bored systq 98123 330029 0 0 3 0x40014200 bored softclock 18964 9085 0 0 7 0x40014200 idle0 1 492901 0 0 3 0 initexec swapper * 0 0 -1 0 7 0x10200 swapper ddb{1}> show uvm Current UVM status: pagesize=4096 (0x1000), pagemask=0xfff, pageshift=12 971039 VM pages: 0 active, 0 inactive, 20 wired, 963031 free (7664 zero) min 10% (25) anon, 10% (25) vnode, 5% (12) vtext freemin=32367, free-target=43156, inactive-target=0, wired-max=323679 faults=0, traps=3, intrs=44, ctxswitch=166 fpuswitch=0 softint=137, syscalls=0, kmapent=13 fault counts: noram=0, noanon=0, noamap=0, pgwait=0, pgrele=0 ok relocks(total)=0(0), anget(retries)=0(0), amapcopy=0 neighbor anon/obj pg=0/0, gets(lock/unlock)=0/0 cases: anon=0, anoncow=0, obj=0, prcopy=0, przero=0 daemon and swap counts: woke=0, revs=0, scans=0, obscans=0, anscans=0 busy=0, freed=0, reactivate=0, deactivate=0 pageouts=0, pending=0, nswget=0 nswapdev=1 swpages=1068660, swpginuse=0, swpgonly=0 paging=0 kernel pointers: objs(kern)=0xffffffff82168f20 ddb{1}> show bcstats Current Buffer Cache status: numbufs 2 busymapped 0, delwri 0 kvaslots 5964 avail kva slots 5964 bufpages 5, dmapages 5, dirtypages 0 pendingreads 0, pendingwrites 0 highflips 0, highflops 0, dmaflips 0 ddb{1}> Or the boot process stops like this: ... scsibus2 at vscsi0: 256 targets softraid0 at root scsibus3 at softraid0: 256 targets root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b or like this: ... scsibus2 at vscsi0: 256 targets softraid0 at root scsibus3 at softraid0: 256 targets root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b panic: rw_enter: (null) locking aga Btw, I frequently also get this "command error": ... wsdisplay0 at inteldrm0 mux 1 pckbd_enable: command error wsdisplay0: screen 0-5 added (std, vt100 emulation) ... On 2021-06-01 16:15, Jonathan Gray wrote: > On Mon, May 31, 2021 at 02:38:34PM +0200, Matthias Pressfreund wrote: >> After applying the diffs you sent on 2021-05-17 and 2021-05-27, I >> booted the new kernel which completed until the login prompt. On >> the way I got this: >> >> ... >> scsibus2 at vscsi0: 256 targets >> softraid0 at root >> scsibus3 at softraid0: 256 targets >> root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b >> >> i915_ggtt_pin called with NULL vma >> WARNING !list_empty(&dev->mode_config.connector_list) failed at >> /usr/src/sys/dev/pci/drm/drm_mode_config.c:487 >> [drm] *ERROR* connector DP-2 leaked! >> drm : drm_WARN_ON(d->wake_count)drm : drm_WARN_ON(d->wake_count)Device >> initialization failed (-22) >> WARNING ({ typeof(vblank->enabled) __tmp = *(volatile >> typeof(vblank->enabled) *)&(vblank->enabled); membar_datadep_consumer(); >> __tmp; }) && drm_core_check_feature(dev, DRIVER_MODESET) failed at >> /usr/src/sys/dev/pci/drm/drm_vblank.c:440 >> Automatic boot in progress: starting file system checks. >> /dev/sd0a (5e1040cb2dc494f4.a): file system is clean; not checking >> ... >> >> >> Then I rebooted a few times without problems. Then, this happened: >> >> ... >> scsibus2 at vscsi0: 256 targets >> softraid0 at root >> scsibus3 at softraid0: 256 targets >> root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b >> uvm_fault(0xffffffff8218aa20, 0xb9, 0, 1) -> e >> kernel: page fault trap, code=0 >> Stopped at i915_ggtt_pin+0x31: movq 0xb8(%rdi),%r12 >> ddb{0}> trace >> i915_ggtt_pin(1,10000,20) at i915_ggtt_pin+0x31 >> gen6_ppgtt_pin(ffff80000080f000) at gen6_ppgtt_pin+0x7c >> __intel_context_do_pin(fffffd817adb6d80) at __intel_context_do_pin+0xca >> intel_engines_init(ffff800000104c38) at intel_engines_init+0x4b5 >> intel_gt_init(ffff800000104c38) at intel_gt_init+0x130 >> i915_gem_init(ffff800000100000) at i915_gem_init+0xa3 >> i915_driver_probe(ffff800000100000,ffffffff8207c330) at >> i915_driver_probe+0x7ed >> >> inteldrm_attachhook(ffff800000100000) at inteldrm_attachhook+0x43 >> config_process_deferred_mountroot() at config_process_deferred_mountroot+0x6b > > I believe something is wrongly writing over the struct with the vma. > Can you drop the previous patches and try this? > > Index: sys/dev/pci/drm/i915/i915_vma.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/drm/i915/i915_vma.c,v > retrieving revision 1.5 > diff -u -p -r1.5 i915_vma.c > --- sys/dev/pci/drm/i915/i915_vma.c 11 Oct 2020 05:45:33 -0000 1.5 > +++ sys/dev/pci/drm/i915/i915_vma.c 1 Jun 2021 13:16:37 -0000 > @@ -484,7 +484,8 @@ void __iomem *i915_vma_pin_iomap(struct > err = agp_map_subregion(dev_priv->agph, vma->node.start, > vma->node.size, &vma->bsh); > if (err) { > - err = -err; > + printf("%s agp_map_subregion failed\n", __func__); > + err = -ENOMEM; > goto err; > } > ptr = bus_space_vaddr(dev_priv->bst, vma->bsh); > @@ -851,6 +852,13 @@ static int vma_get_pages(struct i915_vma > if (mutex_lock_interruptible(&vma->pages_mutex)) > return -EINTR; > > + if (((vaddr_t)vma->obj) < VM_MIN_KERNEL_ADDRESS) { > + printf("%s bad obj ptr %p\n", __func__, vma->obj); > + vma->obj = NULL; > + err = -EINVAL; > + goto unlock; > + } > + > if (!atomic_read(&vma->pages_count)) { > if (vma->obj) { > err = i915_gem_object_pin_pages(vma->obj); > @@ -1029,8 +1037,14 @@ static void flush_idle_contexts(struct i > > int i915_ggtt_pin(struct i915_vma *vma, u32 align, unsigned int flags) > { > - struct i915_address_space *vm = vma->vm; > + struct i915_address_space *vm; > int err; > + > + if (((vaddr_t)vma) < VM_MIN_KERNEL_ADDRESS) { > + printf("%s called with %p vma\n", __func__, vma); > + return -EINVAL; > + } > + vm = vma->vm; > > GEM_BUG_ON(!i915_vma_is_ggtt(vma)); > > Index: sys/dev/pci/drm/i915/gt/gen6_ppgtt.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/gen6_ppgtt.c,v > retrieving revision 1.1 > diff -u -p -r1.1 gen6_ppgtt.c > --- sys/dev/pci/drm/i915/gt/gen6_ppgtt.c 8 Jun 2020 04:48:13 -0000 > 1.1 > +++ sys/dev/pci/drm/i915/gt/gen6_ppgtt.c 1 Jun 2021 13:18:33 -0000 > @@ -354,8 +354,10 @@ static struct i915_vma *pd_vma_create(st > GEM_BUG_ON(size > ggtt->vm.total); > > vma = i915_vma_alloc(); > - if (!vma) > + if (!vma) { > + printf("%s i915_vma_alloc() returned NULL\n", __func__); > return ERR_PTR(-ENOMEM); > + } > > i915_active_init(&vma->active, NULL, NULL); > > @@ -401,8 +403,13 @@ int gen6_ppgtt_pin(struct i915_ppgtt *ba > * size. We allocate at the top of the GTT to avoid fragmentation. > */ > err = 0; > - if (!atomic_read(&ppgtt->pin_count)) > + if (!atomic_read(&ppgtt->pin_count)) { > + printf("gen6 pad1 0 %x\n", ppgtt->pad1[0]); > + printf("pad1 4095 %x\n", ppgtt->pad1[4095]); > + printf("pad2 0 %x\n", ppgtt->pad2[0]); > + printf("pad2 4095 %x\n", ppgtt->pad2[4095]); > err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH); > + } > if (!err) > atomic_inc(&ppgtt->pin_count); > mutex_unlock(&ppgtt->pin_mutex); > Index: sys/dev/pci/drm/i915/gt/gen6_ppgtt.h > =================================================================== > RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/gen6_ppgtt.h,v > retrieving revision 1.1 > diff -u -p -r1.1 gen6_ppgtt.h > --- sys/dev/pci/drm/i915/gt/gen6_ppgtt.h 8 Jun 2020 04:48:13 -0000 > 1.1 > +++ sys/dev/pci/drm/i915/gt/gen6_ppgtt.h 1 Jun 2021 12:59:09 -0000 > @@ -12,7 +12,9 @@ struct gen6_ppgtt { > struct i915_ppgtt base; > > struct rwlock flush; > + unsigned int pad1[PAGE_SIZE]; > struct i915_vma *vma; > + unsigned int pad2[PAGE_SIZE]; > gen6_pte_t __iomem *pd_addr; > > atomic_t pin_count; > Index: sys/dev/pci/drm/i915/gt/intel_ggtt.c > =================================================================== > RCS file: /cvs/src/sys/dev/pci/drm/i915/gt/intel_ggtt.c,v > retrieving revision 1.1 > diff -u -p -r1.1 intel_ggtt.c > --- sys/dev/pci/drm/i915/gt/intel_ggtt.c 8 Jun 2020 04:48:13 -0000 > 1.1 > +++ sys/dev/pci/drm/i915/gt/intel_ggtt.c 1 Jun 2021 10:44:43 -0000 > @@ -699,8 +699,11 @@ int i915_init_ggtt(struct drm_i915_priva > > if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { > ret = init_aliasing_ppgtt(&i915->ggtt); > - if (ret) > + if (ret) { > + printf("\n%s init_aliasing_ppgtt() returned %d\n", > + __func__, ret); > cleanup_init_ggtt(&i915->ggtt); > + } > } > > return 0; >