After applying the diffs you sent on 2021-05-17 and 2021-05-27, I
booted the new kernel which completed until the login prompt. On
the way I got this:

...
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b

i915_ggtt_pin called with NULL vma
WARNING !list_empty(&dev->mode_config.connector_list) failed at 
/usr/src/sys/dev/pci/drm/drm_mode_config.c:487
[drm] *ERROR* connector DP-2 leaked!
drm : drm_WARN_ON(d->wake_count)drm : drm_WARN_ON(d->wake_count)Device 
initialization failed (-22)
WARNING ({ typeof(vblank->enabled) __tmp = *(volatile typeof(vblank->enabled) 
*)&(vblank->enabled); membar_datadep_consumer(); __tmp; }) && 
drm_core_check_feature(dev, DRIVER_MODESET) failed at 
/usr/src/sys/dev/pci/drm/drm_vblank.c:440
Automatic boot in progress: starting file system checks.
/dev/sd0a (5e1040cb2dc494f4.a): file system is clean; not checking
...


Then I rebooted a few times without problems. Then, this happened:

...
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b
uvm_fault(0xffffffff8218aa20, 0xb9, 0, 1) -> e
kernel: page fault trap, code=0
Stopped at      i915_ggtt_pin+0x31:     movq    0xb8(%rdi),%r12
ddb{0}> trace
i915_ggtt_pin(1,10000,20) at i915_ggtt_pin+0x31
gen6_ppgtt_pin(ffff80000080f000) at gen6_ppgtt_pin+0x7c
__intel_context_do_pin(fffffd817adb6d80) at __intel_context_do_pin+0xca
intel_engines_init(ffff800000104c38) at intel_engines_init+0x4b5
intel_gt_init(ffff800000104c38) at intel_gt_init+0x130
i915_gem_init(ffff800000100000) at i915_gem_init+0xa3
i915_driver_probe(ffff800000100000,ffffffff8207c330) at i915_driver_probe+0x7ed

inteldrm_attachhook(ffff800000100000) at inteldrm_attachhook+0x43
config_process_deferred_mountroot() at config_process_deferred_mountroot+0x6b
main(0) at main+0x733
end trace frame: 0x0, count: -10
ddb{0}> mach ddbcpu 1
Stopped at      x86_ipi_db+0x12:        leave
ddb{1}> trace
x86_ipi_db(ffff80001ff39ff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
pagezero() at pagezero+0x1d
end trace frame: 0x0, count: -4
ddb{1}> mach ddbcpu 2
Stopped at      x86_ipi_db+0x12:        leave
ddb{2}> trace
x86_ipi_db(ffff80001ff42ff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
acpicpu_idle() at acpicpu_idle+0x1ea
sched_idle(ffff80001ff42ff0) at sched_idle+0x27e
end trace frame: 0x0, count: -5
ddb{2}> mach ddbcpu 3
Stopped at      x86_ipi_db+0x12:        leave
ddb{3}> trace
x86_ipi_db(ffff80001ff4bff0) at x86_ipi_db+0x12
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x23
acpicpu_idle() at acpicpu_idle+0x1ea
sched_idle(ffff80001ff4bff0) at sched_idle+0x27e
end trace frame: 0x0, count: -5
ddb{3}> mach ddbcpu 0
Stopped at      i915_ggtt_pin+0x31:     movq    0xb8(%rdi),%r12
ddb{0}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 96492  458495      0      0  3     0x14200  bored         i915-userptr-acq
 47107  142704      0      0  3     0x14200  bored         i915_flip
 78707  150564      0      0  3     0x14200  bored         i915_modeset
 65142  186494      0      0  3     0x14200  bored         i915-dp
 40747  202062      0      0  3     0x14200  bored         i915
 95885  520940      0      0  3     0x14200  bored         smr
 53549  455821      0      0  7     0x14200                zerothread
  9763   62961      0      0  3     0x14200  aiodoned      aiodoned
 16393   46637      0      0  2     0x14600                update
 96165   57862      0      0  3     0x14200  cleaner       cleaner
 50668  400747      0      0  3     0x14200  reaper        reaper
 22827   65470      0      0  3     0x14200  pgdaemon      pagedaemon
 74791  354628      0      0  3     0x14200  bored         crynlk
 74563   29562      0      0  3     0x14200  bored         crypto
  3420  208609      0      0  3     0x14200  usbtsk        usbtask
 52920  137514      0      0  3     0x14200  usbatsk       usbatsk
 81094  143259      0      0  3     0x14200  bored         drmtskl
 67467  377341      0      0  3     0x14200  bored         drmlwq
 42508  320137      0      0  3     0x14200  bored         drmlwq
 56022  226224      0      0  3     0x14200  bored         drmlwq
 67137  287154      0      0  3     0x14200  bored         drmlwq
 94635  448729      0      0  3     0x14200  bored         drmubwq
 92127  359644      0      0  3     0x14200  bored         drmubwq
 88871  457027      0      0  3     0x14200  bored         drmubwq
 59563  193255      0      0  3     0x14200  bored         drmubwq
 95387  118220      0      0  3     0x14200  bored         drmhpwq
 42179  204898      0      0  3     0x14200  bored         drmhpwq
 33442  342703      0      0  3     0x14200  bored         drmhpwq
 35835   31321      0      0  3     0x14200  bored         drmhpwq
 93138  477536      0      0  3     0x14200  bored         drmwq
 94032  411421      0      0  3     0x14200  bored         drmwq
 99840  206532      0      0  3     0x14200  bored         drmwq
 98846  375842      0      0  3     0x14200  bored         drmwq
 48774  506278      0      0  2  0x40014200                acpi0
 70133  127243      0      0  7  0x40014200                idle3
 77559  343912      0      0  7  0x40014200                idle2
 92051  177112      0      0  1     0x14200                idle1
 98224  121451      0      0  2     0x14200                sensors
 38426  419300      0      0  3     0x14200  bored         softnet
 81391  515783      0      0  3     0x14200  bored         systqmp
 12371  202579      0      0  3     0x14200  bored         systq
 74071  217005      0      0  2  0x40014200                softclock
 15684   18336      0      0  3  0x40014200                idle0
     1  231868      0      0  3           0  initexec      swapper
*    0       0     -1      0  7     0x10200                swapper
ddb{0}> show uvm
Current UVM status:
  pagesize=4096 (0x1000), pagemask=0xfff, pageshift=12
  971040 VM pages: 0 active, 0 inactive, 20 wired, 963031 free (19081 zero)
  min  10% (25) anon, 10% (25) vnode, 5% (12) vtext
  freemin=32368, free-target=43157, inactive-target=0, wired-max=323680
  faults=1, traps=3, intrs=8601, ctxswitch=168 fpuswitch=0
  softint=2869, syscalls=0, kmapent=12
  fault counts:
    noram=0, noanon=0, noamap=0, pgwait=0, pgrele=0
    ok relocks(total)=0(0), anget(retries)=0(0), amapcopy=0
    neighbor anon/obj pg=0/0, gets(lock/unlock)=0/0
    cases: anon=0, anoncow=0, obj=0, prcopy=0, przero=0
  daemon and swap counts:
    woke=0, revs=0, scans=0, obscans=0, anscans=0
    busy=0, freed=0, reactivate=0, deactivate=0
    pageouts=0, pending=0, nswget=0
    nswapdev=1
    swpages=1068660, swpginuse=0, swpgonly=0 paging=0
  kernel pointers:
    objs(kern)=0xffffffff82160b58
ddb{0}> show bcstats
Current Buffer Cache status:
numbufs 2 busymapped 0, delwri 0
kvaslots 5964 avail kva slots 5964
bufpages 5, dmapages 5, dirtypages 0
pendingreads 0, pendingwrites 0
highflips 0, highflops 0, dmaflips 0
ddb{0}>


On the next reboot, the system booted up with this:

...
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b

i915_ggtt_pin called with NULL vma
WARNING !list_empty(&dev->mode_config.connector_list) failed at 
/usr/src/sys/dev/pci/drm/drm_mode_config.c:487
[drm] *ERROR* connector DP-2 leaked!
drm : drm_WARN_ON(d->wake_count)drm : drm_WARN_ON(d->wake_count)Device 
initialization failed (-22)
WARNING ({ typeof(vblank->enabled) __tmp = *(volatile typeof(vblank->enabled) 
*)&(vblank->enabled); membar_datadep_consumer(); __tmp; }) && 
drm_core_check_feature(dev, DRIVER_MODESET) failed at 
/usr/src/sys/dev/pci/drm/drm_vblank.c:440
Automatic boot in progress: starting file system checks.
/dev/sd0a (5e1040cb2dc494f4.a): file system is clean; not checking
...


Next reboot did not complete. The boot process stopped here:

...
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
root on sd0a (5e1040cb2dc494f4.a) swap on sd0b dump on sd0b



I hope this helps to narrow down the issue.


On 2021-05-27 08:41, Jonathan Gray wrote:
> On Mon, May 17, 2021 at 05:35:02PM +1000, Jonathan Gray wrote:
>> On Tue, May 04, 2021 at 03:44:54PM +0200, m...@fn.de wrote:
>>> Thanks for the quick help. I built a kernel with your fix.
>>> The system is booting up with a warning, now.
>>>
>>> ...
>>> scsibus3 at softraid0: 256 targets
>>> sd2 at scsibus3 targ 1 lun 0: <OPENBSD, SR RAID 1, 006>
>>> sd2: 122103MB, 512 bytes/sector, 250067198 sectors
>>> root on sd2a (da12fadb67cf7a4d.a) swap on sd2b dump on sd2b
>>> drm : drm_WARN_ON(d->wake_count)drm : drm_WARN_ON(d->wake_count)Device 
>>> initialization failed (-22)
>>> Automatic boot in progress: starting file system checks.
>>> /dev/sd2a (da12fadb67cf7a4d.a): file system is clean; not checking
>>> ...
>>
>> Thanks, can you try this?
> 
> And this diff with commits backported to -current related to vma/pinning.
> 
> drm/i915/gt: Prevent use of engine->wa_ctx after error
> drm/i915: Fix mismatch between misplaced vma check and vma insert
> drm/i915: Hold onto an explicit ref to i915_vma_work.pinned
> drm/i915: Use the active reference on the vma while capturing
> 
> diff --git sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c 
> sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c
> index 971ed84f371..993c2b22f9f 100644
> --- sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c
> +++ sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c
> @@ -365,7 +365,7 @@ eb_vma_misplaced(const struct drm_i915_gem_exec_object2 
> *entry,
>               return true;
>  
>       if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
> -         (vma->node.start + vma->node.size - 1) >> 32)
> +         (vma->node.start + vma->node.size + 4095) >> 32)
>               return true;
>  
>       if (flags & __EXEC_OBJECT_NEEDS_MAP &&
> diff --git sys/dev/pci/drm/i915/gt/intel_lrc.c 
> sys/dev/pci/drm/i915/gt/intel_lrc.c
> index ac8eade748b..9bdb964d14f 100644
> --- sys/dev/pci/drm/i915/gt/intel_lrc.c
> +++ sys/dev/pci/drm/i915/gt/intel_lrc.c
> @@ -3462,6 +3462,9 @@ err:
>  static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
>  {
>       i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
> +
> +     /* Called on error unwind, clear all flags to prevent further use */
> +     memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx));
>  }
>  
>  typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
> diff --git sys/dev/pci/drm/i915/i915_gpu_error.c 
> sys/dev/pci/drm/i915/i915_gpu_error.c
> index 9d02829f8df..72e25f3d014 100644
> --- sys/dev/pci/drm/i915/i915_gpu_error.c
> +++ sys/dev/pci/drm/i915/i915_gpu_error.c
> @@ -1346,7 +1346,7 @@ capture_vma(struct intel_engine_capture_vma *next,
>       }
>  
>       strlcpy(c->name, name, sizeof(c->name));
> -     c->vma = i915_vma_get(vma);
> +     c->vma = vma; /* reference held while active */
>  
>       c->next = next;
>       return c;
> @@ -1456,7 +1456,6 @@ intel_engine_coredump_add_vma(struct 
> intel_engine_coredump *ee,
>                                                compress));
>  
>               i915_active_release(&vma->active);
> -             i915_vma_put(vma);
>  
>               capture = this->next;
>               kfree(this);
> diff --git sys/dev/pci/drm/i915/i915_vma.c sys/dev/pci/drm/i915/i915_vma.c
> index 2bf2292ae31..8aca774266c 100644
> --- sys/dev/pci/drm/i915/i915_vma.c
> +++ sys/dev/pci/drm/i915/i915_vma.c
> @@ -331,8 +331,10 @@ static void __vma_release(struct dma_fence_work *work)
>  {
>       struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
>  
> -     if (vw->pinned)
> +     if (vw->pinned) {
>               __i915_gem_object_unpin_pages(vw->pinned);
> +             i915_gem_object_put(vw->pinned);
> +     }
>  }
>  
>  static const struct dma_fence_work_ops bind_ops = {
> @@ -448,7 +450,7 @@ int i915_vma_bind(struct i915_vma *vma,
>  
>               if (vma->obj) {
>                       __i915_gem_object_pin_pages(vma->obj);
> -                     work->pinned = vma->obj;
> +                     work->pinned = i915_gem_object_get(vma->obj);
>               }
>       } else {
>               ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
> 

Reply via email to