After applying the diffs you sent on 2021-05-17 and 2021-05-27, I booted the new kernel which completed until the login prompt. On the way I got this:
... scsibus2 at vscsi0: 256 targets softraid0 at root scsibus3 at softraid0: 256 targets root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b i915_ggtt_pin called with NULL vma WARNING !list_empty(&dev->mode_config.connector_list) failed at /usr/src/sys/dev/pci/drm/drm_mode_config.c:487 [drm] *ERROR* connector DP-2 leaked! drm : drm_WARN_ON(d->wake_count)drm : drm_WARN_ON(d->wake_count)Device initialization failed (-22) WARNING ({ typeof(vblank->enabled) __tmp = *(volatile typeof(vblank->enabled) *)&(vblank->enabled); membar_datadep_consumer(); __tmp; }) && drm_core_check_feature(dev, DRIVER_MODESET) failed at /usr/src/sys/dev/pci/drm/drm_vblank.c:440 Automatic boot in progress: starting file system checks. /dev/sd0a (5e1040cb2dc494f4.a): file system is clean; not checking ... Then I rebooted a few times without problems. Then, this happened: ... scsibus2 at vscsi0: 256 targets softraid0 at root scsibus3 at softraid0: 256 targets root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b uvm_fault(0xffffffff8218aa20, 0xb9, 0, 1) -> e kernel: page fault trap, code=0 Stopped at i915_ggtt_pin+0x31: movq 0xb8(%rdi),%r12 ddb{0}> trace i915_ggtt_pin(1,10000,20) at i915_ggtt_pin+0x31 gen6_ppgtt_pin(ffff80000080f000) at gen6_ppgtt_pin+0x7c __intel_context_do_pin(fffffd817adb6d80) at __intel_context_do_pin+0xca intel_engines_init(ffff800000104c38) at intel_engines_init+0x4b5 intel_gt_init(ffff800000104c38) at intel_gt_init+0x130 i915_gem_init(ffff800000100000) at i915_gem_init+0xa3 i915_driver_probe(ffff800000100000,ffffffff8207c330) at i915_driver_probe+0x7ed inteldrm_attachhook(ffff800000100000) at inteldrm_attachhook+0x43 config_process_deferred_mountroot() at config_process_deferred_mountroot+0x6b main(0) at main+0x733 end trace frame: 0x0, count: -10 ddb{0}> mach ddbcpu 1 Stopped at x86_ipi_db+0x12: leave ddb{1}> trace x86_ipi_db(ffff80001ff39ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 pagezero() at pagezero+0x1d end trace frame: 0x0, count: -4 ddb{1}> mach ddbcpu 2 Stopped at x86_ipi_db+0x12: leave ddb{2}> trace x86_ipi_db(ffff80001ff42ff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x1ea sched_idle(ffff80001ff42ff0) at sched_idle+0x27e end trace frame: 0x0, count: -5 ddb{2}> mach ddbcpu 3 Stopped at x86_ipi_db+0x12: leave ddb{3}> trace x86_ipi_db(ffff80001ff4bff0) at x86_ipi_db+0x12 x86_ipi_handler() at x86_ipi_handler+0x80 Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23 acpicpu_idle() at acpicpu_idle+0x1ea sched_idle(ffff80001ff4bff0) at sched_idle+0x27e end trace frame: 0x0, count: -5 ddb{3}> mach ddbcpu 0 Stopped at i915_ggtt_pin+0x31: movq 0xb8(%rdi),%r12 ddb{0}> ps PID TID PPID UID S FLAGS WAIT COMMAND 96492 458495 0 0 3 0x14200 bored i915-userptr-acq 47107 142704 0 0 3 0x14200 bored i915_flip 78707 150564 0 0 3 0x14200 bored i915_modeset 65142 186494 0 0 3 0x14200 bored i915-dp 40747 202062 0 0 3 0x14200 bored i915 95885 520940 0 0 3 0x14200 bored smr 53549 455821 0 0 7 0x14200 zerothread 9763 62961 0 0 3 0x14200 aiodoned aiodoned 16393 46637 0 0 2 0x14600 update 96165 57862 0 0 3 0x14200 cleaner cleaner 50668 400747 0 0 3 0x14200 reaper reaper 22827 65470 0 0 3 0x14200 pgdaemon pagedaemon 74791 354628 0 0 3 0x14200 bored crynlk 74563 29562 0 0 3 0x14200 bored crypto 3420 208609 0 0 3 0x14200 usbtsk usbtask 52920 137514 0 0 3 0x14200 usbatsk usbatsk 81094 143259 0 0 3 0x14200 bored drmtskl 67467 377341 0 0 3 0x14200 bored drmlwq 42508 320137 0 0 3 0x14200 bored drmlwq 56022 226224 0 0 3 0x14200 bored drmlwq 67137 287154 0 0 3 0x14200 bored drmlwq 94635 448729 0 0 3 0x14200 bored drmubwq 92127 359644 0 0 3 0x14200 bored drmubwq 88871 457027 0 0 3 0x14200 bored drmubwq 59563 193255 0 0 3 0x14200 bored drmubwq 95387 118220 0 0 3 0x14200 bored drmhpwq 42179 204898 0 0 3 0x14200 bored drmhpwq 33442 342703 0 0 3 0x14200 bored drmhpwq 35835 31321 0 0 3 0x14200 bored drmhpwq 93138 477536 0 0 3 0x14200 bored drmwq 94032 411421 0 0 3 0x14200 bored drmwq 99840 206532 0 0 3 0x14200 bored drmwq 98846 375842 0 0 3 0x14200 bored drmwq 48774 506278 0 0 2 0x40014200 acpi0 70133 127243 0 0 7 0x40014200 idle3 77559 343912 0 0 7 0x40014200 idle2 92051 177112 0 0 1 0x14200 idle1 98224 121451 0 0 2 0x14200 sensors 38426 419300 0 0 3 0x14200 bored softnet 81391 515783 0 0 3 0x14200 bored systqmp 12371 202579 0 0 3 0x14200 bored systq 74071 217005 0 0 2 0x40014200 softclock 15684 18336 0 0 3 0x40014200 idle0 1 231868 0 0 3 0 initexec swapper * 0 0 -1 0 7 0x10200 swapper ddb{0}> show uvm Current UVM status: pagesize=4096 (0x1000), pagemask=0xfff, pageshift=12 971040 VM pages: 0 active, 0 inactive, 20 wired, 963031 free (19081 zero) min 10% (25) anon, 10% (25) vnode, 5% (12) vtext freemin=32368, free-target=43157, inactive-target=0, wired-max=323680 faults=1, traps=3, intrs=8601, ctxswitch=168 fpuswitch=0 softint=2869, syscalls=0, kmapent=12 fault counts: noram=0, noanon=0, noamap=0, pgwait=0, pgrele=0 ok relocks(total)=0(0), anget(retries)=0(0), amapcopy=0 neighbor anon/obj pg=0/0, gets(lock/unlock)=0/0 cases: anon=0, anoncow=0, obj=0, prcopy=0, przero=0 daemon and swap counts: woke=0, revs=0, scans=0, obscans=0, anscans=0 busy=0, freed=0, reactivate=0, deactivate=0 pageouts=0, pending=0, nswget=0 nswapdev=1 swpages=1068660, swpginuse=0, swpgonly=0 paging=0 kernel pointers: objs(kern)=0xffffffff82160b58 ddb{0}> show bcstats Current Buffer Cache status: numbufs 2 busymapped 0, delwri 0 kvaslots 5964 avail kva slots 5964 bufpages 5, dmapages 5, dirtypages 0 pendingreads 0, pendingwrites 0 highflips 0, highflops 0, dmaflips 0 ddb{0}> On the next reboot, the system booted up with this: ... scsibus2 at vscsi0: 256 targets softraid0 at root scsibus3 at softraid0: 256 targets root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b i915_ggtt_pin called with NULL vma WARNING !list_empty(&dev->mode_config.connector_list) failed at /usr/src/sys/dev/pci/drm/drm_mode_config.c:487 [drm] *ERROR* connector DP-2 leaked! drm : drm_WARN_ON(d->wake_count)drm : drm_WARN_ON(d->wake_count)Device initialization failed (-22) WARNING ({ typeof(vblank->enabled) __tmp = *(volatile typeof(vblank->enabled) *)&(vblank->enabled); membar_datadep_consumer(); __tmp; }) && drm_core_check_feature(dev, DRIVER_MODESET) failed at /usr/src/sys/dev/pci/drm/drm_vblank.c:440 Automatic boot in progress: starting file system checks. /dev/sd0a (5e1040cb2dc494f4.a): file system is clean; not checking ... Next reboot did not complete. The boot process stopped here: ... scsibus2 at vscsi0: 256 targets softraid0 at root scsibus3 at softraid0: 256 targets root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b I hope this helps to narrow down the issue. On 2021-05-27 08:41, Jonathan Gray wrote: > On Mon, May 17, 2021 at 05:35:02PM +1000, Jonathan Gray wrote: >> On Tue, May 04, 2021 at 03:44:54PM +0200, m...@fn.de wrote: >>> Thanks for the quick help. I built a kernel with your fix. >>> The system is booting up with a warning, now. >>> >>> ... >>> scsibus3 at softraid0: 256 targets >>> sd2 at scsibus3 targ 1 lun 0: <OPENBSD, SR RAID 1, 006> >>> sd2: 122103MB, 512 bytes/sector, 250067198 sectors >>> root on sd2a (da12fadb67cf7a4d.a) swap on sd2b dump on sd2b >>> drm : drm_WARN_ON(d->wake_count)drm : drm_WARN_ON(d->wake_count)Device >>> initialization failed (-22) >>> Automatic boot in progress: starting file system checks. >>> /dev/sd2a (da12fadb67cf7a4d.a): file system is clean; not checking >>> ... >> >> Thanks, can you try this? > > And this diff with commits backported to -current related to vma/pinning. > > drm/i915/gt: Prevent use of engine->wa_ctx after error > drm/i915: Fix mismatch between misplaced vma check and vma insert > drm/i915: Hold onto an explicit ref to i915_vma_work.pinned > drm/i915: Use the active reference on the vma while capturing > > diff --git sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c > sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c > index 971ed84f371..993c2b22f9f 100644 > --- sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c > +++ sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c > @@ -365,7 +365,7 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 > *entry, > return true; > > if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && > - (vma->node.start + vma->node.size - 1) >> 32) > + (vma->node.start + vma->node.size + 4095) >> 32) > return true; > > if (flags & __EXEC_OBJECT_NEEDS_MAP && > diff --git sys/dev/pci/drm/i915/gt/intel_lrc.c > sys/dev/pci/drm/i915/gt/intel_lrc.c > index ac8eade748b..9bdb964d14f 100644 > --- sys/dev/pci/drm/i915/gt/intel_lrc.c > +++ sys/dev/pci/drm/i915/gt/intel_lrc.c > @@ -3462,6 +3462,9 @@ err: > static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) > { > i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); > + > + /* Called on error unwind, clear all flags to prevent further use */ > + memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx)); > } > > typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); > diff --git sys/dev/pci/drm/i915/i915_gpu_error.c > sys/dev/pci/drm/i915/i915_gpu_error.c > index 9d02829f8df..72e25f3d014 100644 > --- sys/dev/pci/drm/i915/i915_gpu_error.c > +++ sys/dev/pci/drm/i915/i915_gpu_error.c > @@ -1346,7 +1346,7 @@ capture_vma(struct intel_engine_capture_vma *next, > } > > strlcpy(c->name, name, sizeof(c->name)); > - c->vma = i915_vma_get(vma); > + c->vma = vma; /* reference held while active */ > > c->next = next; > return c; > @@ -1456,7 +1456,6 @@ intel_engine_coredump_add_vma(struct > intel_engine_coredump *ee, > compress)); > > i915_active_release(&vma->active); > - i915_vma_put(vma); > > capture = this->next; > kfree(this); > diff --git sys/dev/pci/drm/i915/i915_vma.c sys/dev/pci/drm/i915/i915_vma.c > index 2bf2292ae31..8aca774266c 100644 > --- sys/dev/pci/drm/i915/i915_vma.c > +++ sys/dev/pci/drm/i915/i915_vma.c > @@ -331,8 +331,10 @@ static void __vma_release(struct dma_fence_work *work) > { > struct i915_vma_work *vw = container_of(work, typeof(*vw), base); > > - if (vw->pinned) > + if (vw->pinned) { > __i915_gem_object_unpin_pages(vw->pinned); > + i915_gem_object_put(vw->pinned); > + } > } > > static const struct dma_fence_work_ops bind_ops = { > @@ -448,7 +450,7 @@ int i915_vma_bind(struct i915_vma *vma, > > if (vma->obj) { > __i915_gem_object_pin_pages(vma->obj); > - work->pinned = vma->obj; > + work->pinned = i915_gem_object_get(vma->obj); > } > } else { > ret = vma->ops->bind_vma(vma, cache_level, bind_flags); >