On Mon, Jul 07, 2025 at 08:29:47AM +0100, Laurence Tratt wrote:
>   n_fault_status *ERROR*       Faulty UTCL2 client ID: CB/DB (0x0)

6.12.36 diff below has quite a lot of changes, does it help?

'drm/amdgpu: Fix SDMA UTC_L1 handling during start/stop sequences'
at first glance seems related, but may only be for MI300 cards.

The scheduler changes mention fixing 'hanging indefinitely while waiting'

725a59d29adb drm/amdkfd: Fix instruction hazard in gfx12 trap handler
6c7dc7ad867c drm/amdkfd: remove gfx 12 trap handler page size cap
0d087de947ba drm/fbdev-dma: Add shadow buffering for deferred I/O
8cd7ee9cd7de drm/amd/display: Fix mpv playback corruption on weston
5f2e040f19c4 drm/amdgpu: switch job hw_fence to amdgpu_fence
9cfa2fea2566 drm/amdgpu: Fix SDMA UTC_L1 handling during start/stop sequences
cc0a3fd781bf drm/i915/dsi: Fix off by one in BXT_MIPI_TRANS_VTOTAL
e881b82f5d3d drm/amd/display: Check dce_hwseq before dereferencing it
593517e5561c drm/amdgpu: Add kicker device detection
ba1ffc32bda7 drm/amd/display: Fix RMCM programming seq errors
ca8efc6a8971 drm/amd/display: Correct non-OLED pre_T11_delay.
e2c3133ff4d5 drm/amdgpu: amdgpu_vram_mgr_new(): Clamp lpfn to total vram
4ce9f2dc9ff7 drm/amd/display: Add null pointer check for 
get_first_active_display()
b681e2a8a759 drm/amdkfd: Fix race in GWS queue scheduling
18ec560e3e9b drm/i915: fix build error some more
23116bf9a3d0 drm/amd: Adjust output for discovery error handling
840fe792a170 drm/amdgpu/discovery: optionally use fw based ip discovery
7952c4bb5092 Revert "drm/i915/gem: Allow EXEC_CAPTURE on recoverable contexts 
on DG1"
5b930e72aaea drm/i915/gem: Allow EXEC_CAPTURE on recoverable contexts on DG1
aefd0a935625 drm/scheduler: signal scheduled fence when kill job
777580609d57 drm/amdgpu: seq64 memory unmap uses uninterruptible lock
861204dce6ed amd/amdkfd: fix a kfd_process ref leak

commit messages at:
https://cdn.kernel.org/pub/linux/kernel/v6.x/ChangeLog-6.12.36

diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_debugfs.c 
sys/dev/pci/drm/amd/amdgpu/amdgpu_debugfs.c
index 9da4414de61..81f16e4447f 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_debugfs.c
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1902,7 +1902,7 @@ no_preempt:
                        continue;
                }
                job = to_amdgpu_job(s_job);
-               if (preempted && (&job->hw_fence) == fence)
+               if (preempted && (&job->hw_fence.base) == fence)
                        /* mark the job as preempted */
                        job->preemption_status |= AMDGPU_IB_PREEMPTED;
        }
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c 
sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c
index 0eb919a0fe6..c4a899194dc 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c
@@ -6006,7 +6006,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
         *
         * job->base holds a reference to parent fence
         */
-       if (job && dma_fence_is_signaled(&job->hw_fence)) {
+       if (job && dma_fence_is_signaled(&job->hw_fence.base)) {
                job_signaled = true;
                dev_info(adev->dev, "Guilty job already signaled, skipping HW 
reset");
                goto skip_hw_reset;
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_discovery.c 
sys/dev/pci/drm/amd/amdgpu/amdgpu_discovery.c
index 32c9bed4d21..8343abbddce 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_discovery.c
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_discovery.c
@@ -305,10 +305,12 @@ static int amdgpu_discovery_read_binary_from_file(struct 
amdgpu_device *adev,
        const struct firmware *fw;
        int r;
 
-       r = request_firmware(&fw, fw_name, adev->dev);
+       r = firmware_request_nowarn(&fw, fw_name, adev->dev);
        if (r) {
-               dev_err(adev->dev, "can't load firmware \"%s\"\n",
-                       fw_name);
+               if (amdgpu_discovery == 2)
+                       dev_err(adev->dev, "can't load firmware \"%s\"\n", 
fw_name);
+               else
+                       drm_info(&adev->ddev, "Optional firmware \"%s\" was not 
found\n", fw_name);
                return r;
        }
 
@@ -423,16 +425,12 @@ static int amdgpu_discovery_init(struct amdgpu_device 
*adev)
        /* Read from file if it is the preferred option */
        fw_name = amdgpu_discovery_get_fw_name(adev);
        if (fw_name != NULL) {
-               dev_info(adev->dev, "use ip discovery information from file");
+               drm_dbg(&adev->ddev, "use ip discovery information from file");
                r = amdgpu_discovery_read_binary_from_file(adev, 
adev->mman.discovery_bin, fw_name);
-
-               if (r) {
-                       dev_err(adev->dev, "failed to read ip discovery binary 
from file\n");
-                       r = -EINVAL;
+               if (r)
                        goto out;
-               }
-
        } else {
+               drm_dbg(&adev->ddev, "use ip discovery information from 
memory");
                r = amdgpu_discovery_read_binary_from_mem(
                        adev, adev->mman.discovery_bin);
                if (r)
@@ -1303,10 +1301,8 @@ static int amdgpu_discovery_reg_base_init(struct 
amdgpu_device *adev)
        int r;
 
        r = amdgpu_discovery_init(adev);
-       if (r) {
-               DRM_ERROR("amdgpu_discovery_init failed\n");
+       if (r)
                return r;
-       }
 
        adev->gfx.xcc_mask = 0;
        adev->sdma.sdma_mask = 0;
@@ -2446,6 +2442,40 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device 
*adev)
 {
        int r;
 
+       switch (adev->asic_type) {
+       case CHIP_VEGA10:
+       case CHIP_VEGA12:
+       case CHIP_RAVEN:
+       case CHIP_VEGA20:
+       case CHIP_ARCTURUS:
+       case CHIP_ALDEBARAN:
+               /* this is not fatal.  We have a fallback below
+                * if the new firmwares are not present. some of
+                * this will be overridden below to keep things
+                * consistent with the current behavior.
+                */
+               r = amdgpu_discovery_reg_base_init(adev);
+               if (!r) {
+                       amdgpu_discovery_harvest_ip(adev);
+                       amdgpu_discovery_get_gfx_info(adev);
+                       amdgpu_discovery_get_mall_info(adev);
+                       amdgpu_discovery_get_vcn_info(adev);
+               }
+               break;
+       default:
+               r = amdgpu_discovery_reg_base_init(adev);
+               if (r) {
+                       drm_err(&adev->ddev, "discovery failed: %d\n", r);
+                       return r;
+               }
+
+               amdgpu_discovery_harvest_ip(adev);
+               amdgpu_discovery_get_gfx_info(adev);
+               amdgpu_discovery_get_mall_info(adev);
+               amdgpu_discovery_get_vcn_info(adev);
+               break;
+       }
+
        switch (adev->asic_type) {
        case CHIP_VEGA10:
                vega10_reg_base_init(adev);
@@ -2608,14 +2638,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device 
*adev)
                adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
                break;
        default:
-               r = amdgpu_discovery_reg_base_init(adev);
-               if (r)
-                       return -EINVAL;
-
-               amdgpu_discovery_harvest_ip(adev);
-               amdgpu_discovery_get_gfx_info(adev);
-               amdgpu_discovery_get_mall_info(adev);
-               amdgpu_discovery_get_vcn_info(adev);
                break;
        }
 
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_fence.c 
sys/dev/pci/drm/amd/amdgpu/amdgpu_fence.c
index 8cfb0eef1fe..f66053e2b6a 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_fence.c
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_fence.c
@@ -41,22 +41,6 @@
 #include "amdgpu_trace.h"
 #include "amdgpu_reset.h"
 
-/*
- * Fences mark an event in the GPUs pipeline and are used
- * for GPU/CPU synchronization.  When the fence is written,
- * it is expected that all buffers associated with that fence
- * are no longer in use by the associated ring on the GPU and
- * that the relevant GPU caches have been flushed.
- */
-
-struct amdgpu_fence {
-       struct dma_fence base;
-
-       /* RB, DMA, etc. */
-       struct amdgpu_ring              *ring;
-       ktime_t                         start_timestamp;
-};
-
 static struct pool amdgpu_fence_slab;
 
 int amdgpu_fence_slab_init(void)
@@ -164,12 +148,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
 #endif
                if (am_fence == NULL)
                        return -ENOMEM;
-               fence = &am_fence->base;
-               am_fence->ring = ring;
        } else {
                /* take use of job-embedded fence */
-               fence = &job->hw_fence;
+               am_fence = &job->hw_fence;
        }
+       fence = &am_fence->base;
+       am_fence->ring = ring;
 
        seq = ++ring->fence_drv.sync_seq;
        if (job && job->job_run_counter) {
@@ -738,7 +722,7 @@ void amdgpu_fence_driver_clear_job_fences(struct 
amdgpu_ring *ring)
                         * it right here or we won't be able to track them in 
fence_drv
                         * and they will remain unsignaled during sa_bo free.
                         */
-                       job = container_of(old, struct amdgpu_job, hw_fence);
+                       job = container_of(old, struct amdgpu_job, 
hw_fence.base);
                        if (!job->base.s_fence && !dma_fence_is_signaled(old))
                                dma_fence_signal(old);
                        RCU_INIT_POINTER(*ptr, NULL);
@@ -800,7 +784,7 @@ static const char *amdgpu_fence_get_timeline_name(struct 
dma_fence *f)
 
 static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
 {
-       struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
+       struct amdgpu_job *job = container_of(f, struct amdgpu_job, 
hw_fence.base);
 
        return (const char *)to_amdgpu_ring(job->base.sched)->name;
 }
@@ -830,7 +814,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence 
*f)
  */
 static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
 {
-       struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
+       struct amdgpu_job *job = container_of(f, struct amdgpu_job, 
hw_fence.base);
 
        if 
(!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
                amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));
@@ -869,7 +853,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu)
        struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
 
        /* free job if fence has a parent job */
-       kfree(container_of(f, struct amdgpu_job, hw_fence));
+       kfree(container_of(f, struct amdgpu_job, hw_fence.base));
 }
 
 /**
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_job.c 
sys/dev/pci/drm/amd/amdgpu/amdgpu_job.c
index 1ce20a19be8..7e6057a6e7f 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_job.c
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_job.c
@@ -259,8 +259,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
        /* Check if any fences where initialized */
        if (job->base.s_fence && job->base.s_fence->finished.ops)
                f = &job->base.s_fence->finished;
-       else if (job->hw_fence.ops)
-               f = &job->hw_fence;
+       else if (job->hw_fence.base.ops)
+               f = &job->hw_fence.base;
        else
                f = NULL;
 
@@ -277,10 +277,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job 
*s_job)
        amdgpu_sync_free(&job->explicit_sync);
 
        /* only put the hw fence if has embedded fence */
-       if (!job->hw_fence.ops)
+       if (!job->hw_fence.base.ops)
                kfree(job);
        else
-               dma_fence_put(&job->hw_fence);
+               dma_fence_put(&job->hw_fence.base);
 }
 
 void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -309,10 +309,10 @@ void amdgpu_job_free(struct amdgpu_job *job)
        if (job->gang_submit != &job->base.s_fence->scheduled)
                dma_fence_put(job->gang_submit);
 
-       if (!job->hw_fence.ops)
+       if (!job->hw_fence.base.ops)
                kfree(job);
        else
-               dma_fence_put(&job->hw_fence);
+               dma_fence_put(&job->hw_fence.base);
 }
 
 struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_job.h 
sys/dev/pci/drm/amd/amdgpu/amdgpu_job.h
index ce6b9ba967f..4fe033d8f35 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_job.h
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_job.h
@@ -48,7 +48,7 @@ struct amdgpu_job {
        struct drm_sched_job    base;
        struct amdgpu_vm        *vm;
        struct amdgpu_sync      explicit_sync;
-       struct dma_fence        hw_fence;
+       struct amdgpu_fence     hw_fence;
        struct dma_fence        *gang_submit;
        uint32_t                preamble_status;
        uint32_t                preemption_status;
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_ring.h 
sys/dev/pci/drm/amd/amdgpu/amdgpu_ring.h
index 93e3419f036..25a74726ea2 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_ring.h
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_ring.h
@@ -126,6 +126,22 @@ struct amdgpu_fence_driver {
        struct dma_fence                **fences;
 };
 
+/*
+ * Fences mark an event in the GPUs pipeline and are used
+ * for GPU/CPU synchronization.  When the fence is written,
+ * it is expected that all buffers associated with that fence
+ * are no longer in use by the associated ring on the GPU and
+ * that the relevant GPU caches have been flushed.
+ */
+
+struct amdgpu_fence {
+       struct dma_fence base;
+
+       /* RB, DMA, etc. */
+       struct amdgpu_ring              *ring;
+       ktime_t                         start_timestamp;
+};
+
 extern const struct drm_sched_backend_ops amdgpu_sched_ops;
 
 void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_seq64.c 
sys/dev/pci/drm/amd/amdgpu/amdgpu_seq64.c
index e22cb2b5cd9..dba8051b8c1 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_seq64.c
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_seq64.c
@@ -133,7 +133,7 @@ void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct 
amdgpu_fpriv *fpriv)
 
        vm = &fpriv->vm;
 
-       drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+       drm_exec_init(&exec, 0, 0);
        drm_exec_until_all_locked(&exec) {
                r = amdgpu_vm_lock_pd(vm, &exec, 0);
                if (likely(!r))
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_ucode.c 
sys/dev/pci/drm/amd/amdgpu/amdgpu_ucode.c
index a77b0b02f52..84f817794ba 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_ucode.c
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_ucode.c
@@ -30,6 +30,10 @@
 
 #define AMDGPU_UCODE_NAME_MAX          (128)
 
+static const struct kicker_device kicker_device_list[] = {
+       {0x744B, 0x00},
+};
+
 static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header 
*hdr)
 {
        DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes));
@@ -1387,6 +1391,19 @@ static const char *amdgpu_ucode_legacy_naming(struct 
amdgpu_device *adev, int bl
        return NULL;
 }
 
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) {
+               if (adev->pdev->device == kicker_device_list[i].device &&
+                       adev->pdev->revision == kicker_device_list[i].revision)
+               return true;
+       }
+
+       return false;
+}
+
 void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int 
block_type, char *ucode_prefix, int len)
 {
        int maj, min, rev;
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_ucode.h 
sys/dev/pci/drm/amd/amdgpu/amdgpu_ucode.h
index 6532dd34faa..15e547f8818 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_ucode.h
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_ucode.h
@@ -595,6 +595,11 @@ struct amdgpu_firmware {
        uint64_t fw_buf_mc;
 };
 
+struct kicker_device{
+       unsigned short device;
+       u8 revision;
+};
+
 void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr);
 void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr);
@@ -622,5 +627,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int 
load_type);
 const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id);
 
 void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int 
block_type, char *ucode_prefix, int len);
+bool amdgpu_is_kicker_fw(struct amdgpu_device *adev);
 
 #endif
diff --git sys/dev/pci/drm/amd/amdgpu/amdgpu_vram_mgr.c 
sys/dev/pci/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 5ec6e958205..14deeb41819 100644
--- sys/dev/pci/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ sys/dev/pci/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -467,7 +467,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager 
*man,
        int r;
 
        lpfn = (u64)place->lpfn << PAGE_SHIFT;
-       if (!lpfn)
+       if (!lpfn || lpfn > man->size)
                lpfn = man->size;
 
        fpfn = (u64)place->fpfn << PAGE_SHIFT;
diff --git sys/dev/pci/drm/amd/amdgpu/sdma_v4_4_2.c 
sys/dev/pci/drm/amd/amdgpu/sdma_v4_4_2.c
index 7fd3e59526e..9cb1211da9c 100644
--- sys/dev/pci/drm/amd/amdgpu/sdma_v4_4_2.c
+++ sys/dev/pci/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -485,7 +485,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device 
*adev,
 {
        struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
        u32 doorbell_offset, doorbell;
-       u32 rb_cntl, ib_cntl;
+       u32 rb_cntl, ib_cntl, sdma_cntl;
        int i;
 
        for_each_inst(i, inst_mask) {
@@ -497,6 +497,9 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device 
*adev,
                ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
                ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 
0);
                WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
+               sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL);
+               sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 
0);
+               WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl);
 
                if (sdma[i]->use_doorbell) {
                        doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
@@ -953,6 +956,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device 
*adev,
                /* set utc l1 enable flag always to 1 */
                temp = RREG32_SDMA(i, regSDMA_CNTL);
                temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
+               WREG32_SDMA(i, regSDMA_CNTL, temp);
 
                if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 
5)) {
                        /* enable context empty interrupt during initialization 
*/
diff --git sys/dev/pci/drm/amd/amdkfd/cwsr_trap_handler.h 
sys/dev/pci/drm/amd/amdkfd/cwsr_trap_handler.h
index 7062f12b5b7..6c8c9935a0f 100644
--- sys/dev/pci/drm/amd/amdkfd/cwsr_trap_handler.h
+++ sys/dev/pci/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -3640,7 +3640,7 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
 };
 
 static const uint32_t cwsr_trap_gfx12_hex[] = {
-       0xbfa00001, 0xbfa0024b,
+       0xbfa00001, 0xbfa002a2,
        0xb0804009, 0xb8f8f804,
        0x9178ff78, 0x00008c00,
        0xb8fbf811, 0x8b6eff78,
@@ -3714,7 +3714,15 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
        0x00011677, 0xd7610000,
        0x00011a79, 0xd7610000,
        0x00011c7e, 0xd7610000,
-       0x00011e7f, 0xbefe00ff,
+       0x00011e7f, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xbefe00ff,
        0x00003fff, 0xbeff0080,
        0xee0a407a, 0x000c0000,
        0x00004000, 0xd760007a,
@@ -3751,38 +3759,46 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
        0x00000200, 0xbef600ff,
        0x01000000, 0x7e000280,
        0x7e020280, 0x7e040280,
-       0xbefd0080, 0xbe804ec2,
-       0xbf94fffe, 0xb8faf804,
-       0x8b7a847a, 0x91788478,
-       0x8c787a78, 0xd7610002,
-       0x0000fa71, 0x807d817d,
-       0xd7610002, 0x0000fa6c,
-       0x807d817d, 0x917aff6d,
-       0x80000000, 0xd7610002,
-       0x0000fa7a, 0x807d817d,
-       0xd7610002, 0x0000fa6e,
-       0x807d817d, 0xd7610002,
-       0x0000fa6f, 0x807d817d,
-       0xd7610002, 0x0000fa78,
-       0x807d817d, 0xb8faf811,
-       0xd7610002, 0x0000fa7a,
-       0x807d817d, 0xd7610002,
-       0x0000fa7b, 0x807d817d,
-       0xb8f1f801, 0xd7610002,
-       0x0000fa71, 0x807d817d,
-       0xb8f1f814, 0xd7610002,
-       0x0000fa71, 0x807d817d,
-       0xb8f1f815, 0xd7610002,
-       0x0000fa71, 0x807d817d,
-       0xb8f1f812, 0xd7610002,
-       0x0000fa71, 0x807d817d,
-       0xb8f1f813, 0xd7610002,
-       0x0000fa71, 0x807d817d,
+       0xbe804ec2, 0xbf94fffe,
+       0xb8faf804, 0x8b7a847a,
+       0x91788478, 0x8c787a78,
+       0x917aff6d, 0x80000000,
+       0xd7610002, 0x00010071,
+       0xd7610002, 0x0001026c,
+       0xd7610002, 0x0001047a,
+       0xd7610002, 0x0001066e,
+       0xd7610002, 0x0001086f,
+       0xd7610002, 0x00010a78,
+       0xd7610002, 0x00010e7b,
+       0xd8500000, 0x00000000,
+       0xd8500000, 0x00000000,
+       0xd8500000, 0x00000000,
+       0xd8500000, 0x00000000,
+       0xd8500000, 0x00000000,
+       0xd8500000, 0x00000000,
+       0xd8500000, 0x00000000,
+       0xd8500000, 0x00000000,
+       0xb8faf811, 0xd7610002,
+       0x00010c7a, 0xb8faf801,
+       0xd7610002, 0x0001107a,
+       0xb8faf814, 0xd7610002,
+       0x0001127a, 0xb8faf815,
+       0xd7610002, 0x0001147a,
+       0xb8faf812, 0xd7610002,
+       0x0001167a, 0xb8faf813,
+       0xd7610002, 0x0001187a,
        0xb8faf802, 0xd7610002,
-       0x0000fa7a, 0x807d817d,
-       0xbefa50c1, 0xbfc70000,
-       0xd7610002, 0x0000fa7a,
-       0x807d817d, 0xbefe00ff,
+       0x00011a7a, 0xbefa50c1,
+       0xbfc70000, 0xd7610002,
+       0x00011c7a, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xbefe00ff,
        0x0000ffff, 0xbeff0080,
        0xc4068070, 0x008ce802,
        0x00000000, 0xbefe00c1,
@@ -3797,329 +3813,356 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
        0xbe824102, 0xbe844104,
        0xbe864106, 0xbe884108,
        0xbe8a410a, 0xbe8c410c,
-       0xbe8e410e, 0xd7610002,
-       0x0000f200, 0x80798179,
-       0xd7610002, 0x0000f201,
-       0x80798179, 0xd7610002,
-       0x0000f202, 0x80798179,
-       0xd7610002, 0x0000f203,
-       0x80798179, 0xd7610002,
-       0x0000f204, 0x80798179,
-       0xd7610002, 0x0000f205,
-       0x80798179, 0xd7610002,
-       0x0000f206, 0x80798179,
-       0xd7610002, 0x0000f207,
-       0x80798179, 0xd7610002,
-       0x0000f208, 0x80798179,
-       0xd7610002, 0x0000f209,
-       0x80798179, 0xd7610002,
-       0x0000f20a, 0x80798179,
-       0xd7610002, 0x0000f20b,
-       0x80798179, 0xd7610002,
-       0x0000f20c, 0x80798179,
-       0xd7610002, 0x0000f20d,
-       0x80798179, 0xd7610002,
-       0x0000f20e, 0x80798179,
-       0xd7610002, 0x0000f20f,
-       0x80798179, 0xbf06a079,
-       0xbfa10007, 0xc4068070,
+       0xbe8e410e, 0xbf068079,
+       0xbfa10032, 0xd7610002,
+       0x00010000, 0xd7610002,
+       0x00010201, 0xd7610002,
+       0x00010402, 0xd7610002,
+       0x00010603, 0xd7610002,
+       0x00010804, 0xd7610002,
+       0x00010a05, 0xd7610002,
+       0x00010c06, 0xd7610002,
+       0x00010e07, 0xd7610002,
+       0x00011008, 0xd7610002,
+       0x00011209, 0xd7610002,
+       0x0001140a, 0xd7610002,
+       0x0001160b, 0xd7610002,
+       0x0001180c, 0xd7610002,
+       0x00011a0d, 0xd7610002,
+       0x00011c0e, 0xd7610002,
+       0x00011e0f, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0x80799079,
+       0xbfa00038, 0xd7610002,
+       0x00012000, 0xd7610002,
+       0x00012201, 0xd7610002,
+       0x00012402, 0xd7610002,
+       0x00012603, 0xd7610002,
+       0x00012804, 0xd7610002,
+       0x00012a05, 0xd7610002,
+       0x00012c06, 0xd7610002,
+       0x00012e07, 0xd7610002,
+       0x00013008, 0xd7610002,
+       0x00013209, 0xd7610002,
+       0x0001340a, 0xd7610002,
+       0x0001360b, 0xd7610002,
+       0x0001380c, 0xd7610002,
+       0x00013a0d, 0xd7610002,
+       0x00013c0e, 0xd7610002,
+       0x00013e0f, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0x80799079,
+       0xc4068070, 0x008ce802,
+       0x00000000, 0x8070ff70,
+       0x00000080, 0xbef90080,
+       0x7e040280, 0x807d907d,
+       0xbf0aff7d, 0x00000060,
+       0xbfa2ff88, 0xbe804100,
+       0xbe824102, 0xbe844104,
+       0xbe864106, 0xbe884108,
+       0xbe8a410a, 0xd7610002,
+       0x00010000, 0xd7610002,
+       0x00010201, 0xd7610002,
+       0x00010402, 0xd7610002,
+       0x00010603, 0xd7610002,
+       0x00010804, 0xd7610002,
+       0x00010a05, 0xd7610002,
+       0x00010c06, 0xd7610002,
+       0x00010e07, 0xd7610002,
+       0x00011008, 0xd7610002,
+       0x00011209, 0xd7610002,
+       0x0001140a, 0xd7610002,
+       0x0001160b, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xd8500000,
+       0x00000000, 0xc4068070,
        0x008ce802, 0x00000000,
+       0xbefe00c1, 0x857d9973,
+       0x8b7d817d, 0xbf06817d,
+       0xbfa20002, 0xbeff0080,
+       0xbfa00001, 0xbeff00c1,
+       0xb8fb4306, 0x8b7bc17b,
+       0xbfa10044, 0x8b7aff6d,
+       0x80000000, 0xbfa10041,
+       0x847b897b, 0xbef6007b,
+       0xb8f03b05, 0x80708170,
+       0xbf0d9973, 0xbfa20002,
+       0x84708970, 0xbfa00001,
+       0x84708a70, 0xb8fa1e06,
+       0x847a8a7a, 0x80707a70,
+       0x8070ff70, 0x00000200,
        0x8070ff70, 0x00000080,
-       0xbef90080, 0x7e040280,
-       0x807d907d, 0xbf0aff7d,
-       0x00000060, 0xbfa2ffbb,
-       0xbe804100, 0xbe824102,
-       0xbe844104, 0xbe864106,
-       0xbe884108, 0xbe8a410a,
-       0xd7610002, 0x0000f200,
-       0x80798179, 0xd7610002,
-       0x0000f201, 0x80798179,
-       0xd7610002, 0x0000f202,
-       0x80798179, 0xd7610002,
-       0x0000f203, 0x80798179,
-       0xd7610002, 0x0000f204,
-       0x80798179, 0xd7610002,
-       0x0000f205, 0x80798179,
-       0xd7610002, 0x0000f206,
-       0x80798179, 0xd7610002,
-       0x0000f207, 0x80798179,
-       0xd7610002, 0x0000f208,
-       0x80798179, 0xd7610002,
-       0x0000f209, 0x80798179,
-       0xd7610002, 0x0000f20a,
-       0x80798179, 0xd7610002,
-       0x0000f20b, 0x80798179,
-       0xc4068070, 0x008ce802,
-       0x00000000, 0xbefe00c1,
-       0x857d9973, 0x8b7d817d,
-       0xbf06817d, 0xbfa20002,
-       0xbeff0080, 0xbfa00001,
-       0xbeff00c1, 0xb8fb4306,
-       0x8b7bc17b, 0xbfa10044,
-       0x8b7aff6d, 0x80000000,
-       0xbfa10041, 0x847b897b,
-       0xbef6007b, 0xb8f03b05,
-       0x80708170, 0xbf0d9973,
-       0xbfa20002, 0x84708970,
-       0xbfa00001, 0x84708a70,
-       0xb8fa1e06, 0x847a8a7a,
-       0x80707a70, 0x8070ff70,
-       0x00000200, 0x8070ff70,
-       0x00000080, 0xbef600ff,
-       0x01000000, 0xd71f0000,
-       0x000100c1, 0xd7200000,
-       0x000200c1, 0x16000084,
-       0x857d9973, 0x8b7d817d,
-       0xbf06817d, 0xbefd0080,
-       0xbfa20013, 0xbe8300ff,
-       0x00000080, 0xbf800000,
-       0xbf800000, 0xbf800000,
-       0xd8d80000, 0x01000000,
-       0xbf8a0000, 0xc4068070,
-       0x008ce801, 0x00000000,
-       0x807d037d, 0x80700370,
-       0xd5250000, 0x0001ff00,
-       0x00000080, 0xbf0a7b7d,
-       0xbfa2fff3, 0xbfa00012,
-       0xbe8300ff, 0x00000100,
+       0xbef600ff, 0x01000000,
+       0xd71f0000, 0x000100c1,
+       0xd7200000, 0x000200c1,
+       0x16000084, 0x857d9973,
+       0x8b7d817d, 0xbf06817d,
+       0xbefd0080, 0xbfa20013,
+       0xbe8300ff, 0x00000080,
        0xbf800000, 0xbf800000,
        0xbf800000, 0xd8d80000,
        0x01000000, 0xbf8a0000,
        0xc4068070, 0x008ce801,
        0x00000000, 0x807d037d,
        0x80700370, 0xd5250000,
-       0x0001ff00, 0x00000100,
+       0x0001ff00, 0x00000080,
        0xbf0a7b7d, 0xbfa2fff3,
-       0xbefe00c1, 0x857d9973,
-       0x8b7d817d, 0xbf06817d,
-       0xbfa20004, 0xbef000ff,
-       0x00000200, 0xbeff0080,
-       0xbfa00003, 0xbef000ff,
-       0x00000400, 0xbeff00c1,
-       0xb8fb3b05, 0x807b817b,
-       0x847b827b, 0x857d9973,
-       0x8b7d817d, 0xbf06817d,
-       0xbfa2001b, 0xbef600ff,
-       0x01000000, 0xbefd0084,
-       0xbf0a7b7d, 0xbfa10040,
-       0x7e008700, 0x7e028701,
-       0x7e048702, 0x7e068703,
-       0xc4068070, 0x008ce800,
-       0x00000000, 0xc4068070,
-       0x008ce801, 0x00008000,
-       0xc4068070, 0x008ce802,
-       0x00010000, 0xc4068070,
-       0x008ce803, 0x00018000,
-       0x807d847d, 0x8070ff70,
-       0x00000200, 0xbf0a7b7d,
-       0xbfa2ffeb, 0xbfa0002a,
+       0xbfa00012, 0xbe8300ff,
+       0x00000100, 0xbf800000,
+       0xbf800000, 0xbf800000,
+       0xd8d80000, 0x01000000,
+       0xbf8a0000, 0xc4068070,
+       0x008ce801, 0x00000000,
+       0x807d037d, 0x80700370,
+       0xd5250000, 0x0001ff00,
+       0x00000100, 0xbf0a7b7d,
+       0xbfa2fff3, 0xbefe00c1,
+       0x857d9973, 0x8b7d817d,
+       0xbf06817d, 0xbfa20004,
+       0xbef000ff, 0x00000200,
+       0xbeff0080, 0xbfa00003,
+       0xbef000ff, 0x00000400,
+       0xbeff00c1, 0xb8fb3b05,
+       0x807b817b, 0x847b827b,
+       0x857d9973, 0x8b7d817d,
+       0xbf06817d, 0xbfa2001b,
        0xbef600ff, 0x01000000,
        0xbefd0084, 0xbf0a7b7d,
-       0xbfa10015, 0x7e008700,
+       0xbfa10040, 0x7e008700,
        0x7e028701, 0x7e048702,
        0x7e068703, 0xc4068070,
        0x008ce800, 0x00000000,
        0xc4068070, 0x008ce801,
-       0x00010000, 0xc4068070,
-       0x008ce802, 0x00020000,
+       0x00008000, 0xc4068070,
+       0x008ce802, 0x00010000,
        0xc4068070, 0x008ce803,
-       0x00030000, 0x807d847d,
-       0x8070ff70, 0x00000400,
+       0x00018000, 0x807d847d,
+       0x8070ff70, 0x00000200,
        0xbf0a7b7d, 0xbfa2ffeb,
-       0xb8fb1e06, 0x8b7bc17b,
-       0xbfa1000d, 0x847b837b,
-       0x807b7d7b, 0xbefe00c1,
-       0xbeff0080, 0x7e008700,
+       0xbfa0002a, 0xbef600ff,
+       0x01000000, 0xbefd0084,
+       0xbf0a7b7d, 0xbfa10015,
+       0x7e008700, 0x7e028701,
+       0x7e048702, 0x7e068703,
        0xc4068070, 0x008ce800,
-       0x00000000, 0x807d817d,
-       0x8070ff70, 0x00000080,
-       0xbf0a7b7d, 0xbfa2fff7,
-       0xbfa0016e, 0xbef4007e,
-       0x8b75ff7f, 0x0000ffff,
-       0x8c75ff75, 0x00040000,
-       0xbef60080, 0xbef700ff,
-       0x10807fac, 0xbef1007f,
-       0xb8f20742, 0x84729972,
-       0x8b6eff7f, 0x04000000,
-       0xbfa1003b, 0xbefe00c1,
-       0x857d9972, 0x8b7d817d,
-       0xbf06817d, 0xbfa20002,
-       0xbeff0080, 0xbfa00001,
-       0xbeff00c1, 0xb8ef4306,
-       0x8b6fc16f, 0xbfa10030,
-       0x846f896f, 0xbef6006f,
+       0x00000000, 0xc4068070,
+       0x008ce801, 0x00010000,
+       0xc4068070, 0x008ce802,
+       0x00020000, 0xc4068070,
+       0x008ce803, 0x00030000,
+       0x807d847d, 0x8070ff70,
+       0x00000400, 0xbf0a7b7d,
+       0xbfa2ffeb, 0xb8fb1e06,
+       0x8b7bc17b, 0xbfa1000d,
+       0x847b837b, 0x807b7d7b,
+       0xbefe00c1, 0xbeff0080,
+       0x7e008700, 0xc4068070,
+       0x008ce800, 0x00000000,
+       0x807d817d, 0x8070ff70,
+       0x00000080, 0xbf0a7b7d,
+       0xbfa2fff7, 0xbfa0016e,
+       0xbef4007e, 0x8b75ff7f,
+       0x0000ffff, 0x8c75ff75,
+       0x00040000, 0xbef60080,
+       0xbef700ff, 0x10807fac,
+       0xbef1007f, 0xb8f20742,
+       0x84729972, 0x8b6eff7f,
+       0x04000000, 0xbfa1003b,
+       0xbefe00c1, 0x857d9972,
+       0x8b7d817d, 0xbf06817d,
+       0xbfa20002, 0xbeff0080,
+       0xbfa00001, 0xbeff00c1,
+       0xb8ef4306, 0x8b6fc16f,
+       0xbfa10030, 0x846f896f,
+       0xbef6006f, 0xb8f83b05,
+       0x80788178, 0xbf0d9972,
+       0xbfa20002, 0x84788978,
+       0xbfa00001, 0x84788a78,
+       0xb8ee1e06, 0x846e8a6e,
+       0x80786e78, 0x8078ff78,
+       0x00000200, 0x8078ff78,
+       0x00000080, 0xbef600ff,
+       0x01000000, 0x857d9972,
+       0x8b7d817d, 0xbf06817d,
+       0xbefd0080, 0xbfa2000d,
+       0xc4050078, 0x0080e800,
+       0x00000000, 0xbf8a0000,
+       0xdac00000, 0x00000000,
+       0x807dff7d, 0x00000080,
+       0x8078ff78, 0x00000080,
+       0xbf0a6f7d, 0xbfa2fff4,
+       0xbfa0000c, 0xc4050078,
+       0x0080e800, 0x00000000,
+       0xbf8a0000, 0xdac00000,
+       0x00000000, 0x807dff7d,
+       0x00000100, 0x8078ff78,
+       0x00000100, 0xbf0a6f7d,
+       0xbfa2fff4, 0xbef80080,
+       0xbefe00c1, 0x857d9972,
+       0x8b7d817d, 0xbf06817d,
+       0xbfa20002, 0xbeff0080,
+       0xbfa00001, 0xbeff00c1,
+       0xb8ef3b05, 0x806f816f,
+       0x846f826f, 0x857d9972,
+       0x8b7d817d, 0xbf06817d,
+       0xbfa2002c, 0xbef600ff,
+       0x01000000, 0xbeee0078,
+       0x8078ff78, 0x00000200,
+       0xbefd0084, 0xbf0a6f7d,
+       0xbfa10061, 0xc4050078,
+       0x008ce800, 0x00000000,
+       0xc4050078, 0x008ce801,
+       0x00008000, 0xc4050078,
+       0x008ce802, 0x00010000,
+       0xc4050078, 0x008ce803,
+       0x00018000, 0xbf8a0000,
+       0x7e008500, 0x7e028501,
+       0x7e048502, 0x7e068503,
+       0x807d847d, 0x8078ff78,
+       0x00000200, 0xbf0a6f7d,
+       0xbfa2ffea, 0xc405006e,
+       0x008ce800, 0x00000000,
+       0xc405006e, 0x008ce801,
+       0x00008000, 0xc405006e,
+       0x008ce802, 0x00010000,
+       0xc405006e, 0x008ce803,
+       0x00018000, 0xbf8a0000,
+       0xbfa0003d, 0xbef600ff,
+       0x01000000, 0xbeee0078,
+       0x8078ff78, 0x00000400,
+       0xbefd0084, 0xbf0a6f7d,
+       0xbfa10016, 0xc4050078,
+       0x008ce800, 0x00000000,
+       0xc4050078, 0x008ce801,
+       0x00010000, 0xc4050078,
+       0x008ce802, 0x00020000,
+       0xc4050078, 0x008ce803,
+       0x00030000, 0xbf8a0000,
+       0x7e008500, 0x7e028501,
+       0x7e048502, 0x7e068503,
+       0x807d847d, 0x8078ff78,
+       0x00000400, 0xbf0a6f7d,
+       0xbfa2ffea, 0xb8ef1e06,
+       0x8b6fc16f, 0xbfa1000f,
+       0x846f836f, 0x806f7d6f,
+       0xbefe00c1, 0xbeff0080,
+       0xc4050078, 0x008ce800,
+       0x00000000, 0xbf8a0000,
+       0x7e008500, 0x807d817d,
+       0x8078ff78, 0x00000080,
+       0xbf0a6f7d, 0xbfa2fff6,
+       0xbeff00c1, 0xc405006e,
+       0x008ce800, 0x00000000,
+       0xc405006e, 0x008ce801,
+       0x00010000, 0xc405006e,
+       0x008ce802, 0x00020000,
+       0xc405006e, 0x008ce803,
+       0x00030000, 0xbf8a0000,
        0xb8f83b05, 0x80788178,
        0xbf0d9972, 0xbfa20002,
        0x84788978, 0xbfa00001,
        0x84788a78, 0xb8ee1e06,
        0x846e8a6e, 0x80786e78,
        0x8078ff78, 0x00000200,
-       0x8078ff78, 0x00000080,
-       0xbef600ff, 0x01000000,
-       0x857d9972, 0x8b7d817d,
-       0xbf06817d, 0xbefd0080,
-       0xbfa2000d, 0xc4050078,
-       0x0080e800, 0x00000000,
-       0xbf8a0000, 0xdac00000,
-       0x00000000, 0x807dff7d,
-       0x00000080, 0x8078ff78,
-       0x00000080, 0xbf0a6f7d,
-       0xbfa2fff4, 0xbfa0000c,
-       0xc4050078, 0x0080e800,
-       0x00000000, 0xbf8a0000,
-       0xdac00000, 0x00000000,
-       0x807dff7d, 0x00000100,
-       0x8078ff78, 0x00000100,
-       0xbf0a6f7d, 0xbfa2fff4,
-       0xbef80080, 0xbefe00c1,
-       0x857d9972, 0x8b7d817d,
-       0xbf06817d, 0xbfa20002,
-       0xbeff0080, 0xbfa00001,
-       0xbeff00c1, 0xb8ef3b05,
-       0x806f816f, 0x846f826f,
-       0x857d9972, 0x8b7d817d,
-       0xbf06817d, 0xbfa2002c,
+       0x80f8ff78, 0x00000050,
        0xbef600ff, 0x01000000,
-       0xbeee0078, 0x8078ff78,
-       0x00000200, 0xbefd0084,
-       0xbf0a6f7d, 0xbfa10061,
-       0xc4050078, 0x008ce800,
-       0x00000000, 0xc4050078,
-       0x008ce801, 0x00008000,
-       0xc4050078, 0x008ce802,
-       0x00010000, 0xc4050078,
-       0x008ce803, 0x00018000,
-       0xbf8a0000, 0x7e008500,
-       0x7e028501, 0x7e048502,
-       0x7e068503, 0x807d847d,
+       0xbefd00ff, 0x0000006c,
+       0x80f89078, 0xf462403a,
+       0xf0000000, 0xbf8a0000,
+       0x80fd847d, 0xbf800000,
+       0xbe804300, 0xbe824302,
+       0x80f8a078, 0xf462603a,
+       0xf0000000, 0xbf8a0000,
+       0x80fd887d, 0xbf800000,
+       0xbe804300, 0xbe824302,
+       0xbe844304, 0xbe864306,
+       0x80f8c078, 0xf462803a,
+       0xf0000000, 0xbf8a0000,
+       0x80fd907d, 0xbf800000,
+       0xbe804300, 0xbe824302,
+       0xbe844304, 0xbe864306,
+       0xbe884308, 0xbe8a430a,
+       0xbe8c430c, 0xbe8e430e,
+       0xbf06807d, 0xbfa1fff0,
+       0xb980f801, 0x00000000,
+       0xb8f83b05, 0x80788178,
+       0xbf0d9972, 0xbfa20002,
+       0x84788978, 0xbfa00001,
+       0x84788a78, 0xb8ee1e06,
+       0x846e8a6e, 0x80786e78,
        0x8078ff78, 0x00000200,
-       0xbf0a6f7d, 0xbfa2ffea,
-       0xc405006e, 0x008ce800,
-       0x00000000, 0xc405006e,
-       0x008ce801, 0x00008000,
-       0xc405006e, 0x008ce802,
-       0x00010000, 0xc405006e,
-       0x008ce803, 0x00018000,
-       0xbf8a0000, 0xbfa0003d,
        0xbef600ff, 0x01000000,
-       0xbeee0078, 0x8078ff78,
-       0x00000400, 0xbefd0084,
-       0xbf0a6f7d, 0xbfa10016,
-       0xc4050078, 0x008ce800,
-       0x00000000, 0xc4050078,
-       0x008ce801, 0x00010000,
-       0xc4050078, 0x008ce802,
-       0x00020000, 0xc4050078,
-       0x008ce803, 0x00030000,
-       0xbf8a0000, 0x7e008500,
-       0x7e028501, 0x7e048502,
-       0x7e068503, 0x807d847d,
-       0x8078ff78, 0x00000400,
-       0xbf0a6f7d, 0xbfa2ffea,
-       0xb8ef1e06, 0x8b6fc16f,
-       0xbfa1000f, 0x846f836f,
-       0x806f7d6f, 0xbefe00c1,
-       0xbeff0080, 0xc4050078,
-       0x008ce800, 0x00000000,
-       0xbf8a0000, 0x7e008500,
-       0x807d817d, 0x8078ff78,
-       0x00000080, 0xbf0a6f7d,
-       0xbfa2fff6, 0xbeff00c1,
-       0xc405006e, 0x008ce800,
-       0x00000000, 0xc405006e,
-       0x008ce801, 0x00010000,
-       0xc405006e, 0x008ce802,
-       0x00020000, 0xc405006e,
-       0x008ce803, 0x00030000,
-       0xbf8a0000, 0xb8f83b05,
-       0x80788178, 0xbf0d9972,
-       0xbfa20002, 0x84788978,
-       0xbfa00001, 0x84788a78,
-       0xb8ee1e06, 0x846e8a6e,
-       0x80786e78, 0x8078ff78,
-       0x00000200, 0x80f8ff78,
-       0x00000050, 0xbef600ff,
-       0x01000000, 0xbefd00ff,
-       0x0000006c, 0x80f89078,
-       0xf462403a, 0xf0000000,
-       0xbf8a0000, 0x80fd847d,
-       0xbf800000, 0xbe804300,
-       0xbe824302, 0x80f8a078,
-       0xf462603a, 0xf0000000,
-       0xbf8a0000, 0x80fd887d,
-       0xbf800000, 0xbe804300,
-       0xbe824302, 0xbe844304,
-       0xbe864306, 0x80f8c078,
-       0xf462803a, 0xf0000000,
-       0xbf8a0000, 0x80fd907d,
-       0xbf800000, 0xbe804300,
-       0xbe824302, 0xbe844304,
-       0xbe864306, 0xbe884308,
-       0xbe8a430a, 0xbe8c430c,
-       0xbe8e430e, 0xbf06807d,
-       0xbfa1fff0, 0xb980f801,
-       0x00000000, 0xb8f83b05,
-       0x80788178, 0xbf0d9972,
-       0xbfa20002, 0x84788978,
-       0xbfa00001, 0x84788a78,
-       0xb8ee1e06, 0x846e8a6e,
-       0x80786e78, 0x8078ff78,
-       0x00000200, 0xbef600ff,
-       0x01000000, 0xbeff0071,
-       0xf4621bfa, 0xf0000000,
-       0x80788478, 0xf4621b3a,
+       0xbeff0071, 0xf4621bfa,
        0xf0000000, 0x80788478,
-       0xf4621b7a, 0xf0000000,
-       0x80788478, 0xf4621c3a,
+       0xf4621b3a, 0xf0000000,
+       0x80788478, 0xf4621b7a,
        0xf0000000, 0x80788478,
-       0xf4621c7a, 0xf0000000,
-       0x80788478, 0xf4621eba,
+       0xf4621c3a, 0xf0000000,
+       0x80788478, 0xf4621c7a,
        0xf0000000, 0x80788478,
-       0xf4621efa, 0xf0000000,
-       0x80788478, 0xf4621e7a,
+       0xf4621eba, 0xf0000000,
+       0x80788478, 0xf4621efa,
        0xf0000000, 0x80788478,
-       0xf4621cfa, 0xf0000000,
-       0x80788478, 0xf4621bba,
+       0xf4621e7a, 0xf0000000,
+       0x80788478, 0xf4621cfa,
        0xf0000000, 0x80788478,
-       0xbf8a0000, 0xb96ef814,
        0xf4621bba, 0xf0000000,
        0x80788478, 0xbf8a0000,
-       0xb96ef815, 0xf4621bba,
+       0xb96ef814, 0xf4621bba,
        0xf0000000, 0x80788478,
-       0xbf8a0000, 0xb96ef812,
+       0xbf8a0000, 0xb96ef815,
        0xf4621bba, 0xf0000000,
        0x80788478, 0xbf8a0000,
-       0xb96ef813, 0x8b6eff7f,
-       0x04000000, 0xbfa1000d,
-       0x80788478, 0xf4621bba,
+       0xb96ef812, 0xf4621bba,
        0xf0000000, 0x80788478,
-       0xbf8a0000, 0xbf0d806e,
-       0xbfa10006, 0x856e906e,
-       0x8b6e6e6e, 0xbfa10003,
-       0xbe804ec1, 0x816ec16e,
-       0xbfa0fffb, 0xbefd006f,
-       0xbefe0070, 0xbeff0071,
-       0xb97b2011, 0x857b867b,
-       0xb97b0191, 0x857b827b,
-       0xb97bba11, 0xb973f801,
-       0xb8ee3b05, 0x806e816e,
-       0xbf0d9972, 0xbfa20002,
-       0x846e896e, 0xbfa00001,
-       0x846e8a6e, 0xb8ef1e06,
-       0x846f8a6f, 0x806e6f6e,
-       0x806eff6e, 0x00000200,
-       0x806e746e, 0x826f8075,
-       0x8b6fff6f, 0x0000ffff,
-       0xf4605c37, 0xf8000050,
-       0xf4605d37, 0xf8000060,
-       0xf4601e77, 0xf8000074,
-       0xbf8a0000, 0x8b6dff6d,
-       0x0000ffff, 0x8bfe7e7e,
-       0x8bea6a6a, 0xb97af804,
+       0xbf8a0000, 0xb96ef813,
+       0x8b6eff7f, 0x04000000,
+       0xbfa1000d, 0x80788478,
+       0xf4621bba, 0xf0000000,
+       0x80788478, 0xbf8a0000,
+       0xbf0d806e, 0xbfa10006,
+       0x856e906e, 0x8b6e6e6e,
+       0xbfa10003, 0xbe804ec1,
+       0x816ec16e, 0xbfa0fffb,
+       0xbefd006f, 0xbefe0070,
+       0xbeff0071, 0xb97b2011,
+       0x857b867b, 0xb97b0191,
+       0x857b827b, 0xb97bba11,
+       0xb973f801, 0xb8ee3b05,
+       0x806e816e, 0xbf0d9972,
+       0xbfa20002, 0x846e896e,
+       0xbfa00001, 0x846e8a6e,
+       0xb8ef1e06, 0x846f8a6f,
+       0x806e6f6e, 0x806eff6e,
+       0x00000200, 0x806e746e,
+       0x826f8075, 0x8b6fff6f,
+       0x0000ffff, 0xf4605c37,
+       0xf8000050, 0xf4605d37,
+       0xf8000060, 0xf4601e77,
+       0xf8000074, 0xbf8a0000,
+       0x8b6dff6d, 0x0000ffff,
+       0x8bfe7e7e, 0x8bea6a6a,
+       0xb97af804, 0xbe804ec2,
+       0xbf94fffe, 0xbe804a6c,
        0xbe804ec2, 0xbf94fffe,
-       0xbe804a6c, 0xbe804ec2,
-       0xbf94fffe, 0xbfb10000,
+       0xbfb10000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
        0xbf9f0000, 0xbf9f0000,
-       0xbf9f0000, 0x00000000,
 };
diff --git sys/dev/pci/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm 
sys/dev/pci/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
index 7b9d36e5fa4..5a1a1b1f897 100644
--- sys/dev/pci/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+++ sys/dev/pci/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -30,6 +30,7 @@
 #define CHIP_GFX12 37
 
 #define SINGLE_STEP_MISSED_WORKAROUND 1        //workaround for lost 
TRAP_AFTER_INST exception when SAVECTX raised
+#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
 
 var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK   = 0x4
 var SQ_WAVE_STATE_PRIV_SCC_SHIFT               = 9
@@ -351,6 +352,7 @@ L_HAVE_VGPRS:
        v_writelane_b32 v0, ttmp13, 0xD
        v_writelane_b32 v0, exec_lo, 0xE
        v_writelane_b32 v0, exec_hi, 0xF
+       valu_sgpr_hazard()
 
        s_mov_b32       exec_lo, 0x3FFF
        s_mov_b32       exec_hi, 0x0
@@ -417,7 +419,6 @@ L_SAVE_HWREG:
        v_mov_b32       v0, 0x0                                                 
//Offset[31:0] from buffer resource
        v_mov_b32       v1, 0x0                                                 
//Offset[63:32] from buffer resource
        v_mov_b32       v2, 0x0                                                 
//Set of SGPRs for TCP store
-       s_mov_b32       m0, 0x0                                                 
//Next lane of v2 to write to
 
        // Ensure no further changes to barrier or LDS state.
        // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
@@ -430,40 +431,41 @@ L_SAVE_HWREG:
        s_andn2_b32     s_save_state_priv, s_save_state_priv, 
SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
        s_or_b32        s_save_state_priv, s_save_state_priv, s_save_tmp
 
-       write_hwreg_to_v2(s_save_m0)
-       write_hwreg_to_v2(s_save_pc_lo)
        s_andn2_b32     s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
-       write_hwreg_to_v2(s_save_tmp)
-       write_hwreg_to_v2(s_save_exec_lo)
-       write_hwreg_to_v2(s_save_exec_hi)
-       write_hwreg_to_v2(s_save_state_priv)
+       v_writelane_b32 v2, s_save_m0, 0x0
+       v_writelane_b32 v2, s_save_pc_lo, 0x1
+       v_writelane_b32 v2, s_save_tmp, 0x2
+       v_writelane_b32 v2, s_save_exec_lo, 0x3
+       v_writelane_b32 v2, s_save_exec_hi, 0x4
+       v_writelane_b32 v2, s_save_state_priv, 0x5
+       v_writelane_b32 v2, s_save_xnack_mask, 0x7
+       valu_sgpr_hazard()
 
        s_getreg_b32    s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
-       write_hwreg_to_v2(s_save_tmp)
+       v_writelane_b32 v2, s_save_tmp, 0x6
 
-       write_hwreg_to_v2(s_save_xnack_mask)
+       s_getreg_b32    s_save_tmp, hwreg(HW_REG_WAVE_MODE)
+       v_writelane_b32 v2, s_save_tmp, 0x8
 
-       s_getreg_b32    s_save_m0, hwreg(HW_REG_WAVE_MODE)
-       write_hwreg_to_v2(s_save_m0)
+       s_getreg_b32    s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO)
+       v_writelane_b32 v2, s_save_tmp, 0x9
 
-       s_getreg_b32    s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO)
-       write_hwreg_to_v2(s_save_m0)
+       s_getreg_b32    s_save_tmp, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI)
+       v_writelane_b32 v2, s_save_tmp, 0xA
 
-       s_getreg_b32    s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI)
-       write_hwreg_to_v2(s_save_m0)
+       s_getreg_b32    s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+       v_writelane_b32 v2, s_save_tmp, 0xB
 
-       s_getreg_b32    s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
-       write_hwreg_to_v2(s_save_m0)
-
-       s_getreg_b32    s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL)
-       write_hwreg_to_v2(s_save_m0)
+       s_getreg_b32    s_save_tmp, hwreg(HW_REG_WAVE_TRAP_CTRL)
+       v_writelane_b32 v2, s_save_tmp, 0xC
 
        s_getreg_b32    s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
-       write_hwreg_to_v2(s_save_tmp)
+       v_writelane_b32 v2, s_save_tmp, 0xD
 
        s_get_barrier_state s_save_tmp, -1
        s_wait_kmcnt (0)
-       write_hwreg_to_v2(s_save_tmp)
+       v_writelane_b32 v2, s_save_tmp, 0xE
+       valu_sgpr_hazard()
 
        // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
        s_mov_b32       exec_lo, 0xFFFF
@@ -497,10 +499,12 @@ L_SAVE_SGPR_LOOP:
        s_movrels_b64   s12, s12                                                
//s12 = s[12+m0], s13 = s[13+m0]
        s_movrels_b64   s14, s14                                                
//s14 = s[14+m0], s15 = s[15+m0]
 
-       write_16sgpr_to_v2(s0)
-
-       s_cmp_eq_u32    ttmp13, 0x20                                            
//have 32 VGPR lanes filled?
-       s_cbranch_scc0  L_SAVE_SGPR_SKIP_TCP_STORE
+       s_cmp_eq_u32    ttmp13, 0x0
+       s_cbranch_scc0  L_WRITE_V2_SECOND_HALF
+       write_16sgpr_to_v2(s0, 0x0)
+       s_branch        L_SAVE_SGPR_SKIP_TCP_STORE
+L_WRITE_V2_SECOND_HALF:
+       write_16sgpr_to_v2(s0, 0x10)
 
        buffer_store_dword      v2, v0, s_save_buf_rsrc0, s_save_mem_offset 
scope:SCOPE_SYS
        s_add_u32       s_save_mem_offset, s_save_mem_offset, 0x80
@@ -1056,27 +1060,21 @@ L_END_PGM:
        s_endpgm_saved
 end
 
-function write_hwreg_to_v2(s)
-       // Copy into VGPR for later TCP store.
-       v_writelane_b32 v2, s, m0
-       s_add_u32       m0, m0, 0x1
-end
-
-
-function write_16sgpr_to_v2(s)
+function write_16sgpr_to_v2(s, lane_offset)
        // Copy into VGPR for later TCP store.
        for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
-               v_writelane_b32 v2, s[sgpr_idx], ttmp13
-               s_add_u32       ttmp13, ttmp13, 0x1
+               v_writelane_b32 v2, s[sgpr_idx], sgpr_idx + lane_offset
        end
+       valu_sgpr_hazard()
+       s_add_u32       ttmp13, ttmp13, 0x10
 end
 
 function write_12sgpr_to_v2(s)
        // Copy into VGPR for later TCP store.
        for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
-               v_writelane_b32 v2, s[sgpr_idx], ttmp13
-               s_add_u32       ttmp13, ttmp13, 0x1
+               v_writelane_b32 v2, s[sgpr_idx], sgpr_idx
        end
+       valu_sgpr_hazard()
 end
 
 function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
@@ -1128,3 +1126,11 @@ function get_wave_size2(s_reg)
        s_getreg_b32    s_reg, 
hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
        s_lshl_b32      s_reg, s_reg, S_WAVE_SIZE
 end
+
+function valu_sgpr_hazard
+#if HAVE_VALU_SGPR_HAZARD
+       for var rep = 0; rep < 8; rep ++
+               ds_nop
+       end
+#endif
+end
diff --git sys/dev/pci/drm/amd/amdkfd/kfd_device.c 
sys/dev/pci/drm/amd/amdkfd/kfd_device.c
index 9186ef0bd2a..07eadab4c1c 100644
--- sys/dev/pci/drm/amd/amdkfd/kfd_device.c
+++ sys/dev/pci/drm/amd/amdkfd/kfd_device.c
@@ -537,7 +537,8 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
                        kfd->cwsr_isa = cwsr_trap_gfx11_hex;
                        kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
                } else {
-                       BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex) > PAGE_SIZE);
+                       BUILD_BUG_ON(sizeof(cwsr_trap_gfx12_hex)
+                                            > KFD_CWSR_TMA_OFFSET);
                        kfd->cwsr_isa = cwsr_trap_gfx12_hex;
                        kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx12_hex);
                }
diff --git sys/dev/pci/drm/amd/amdkfd/kfd_events.c 
sys/dev/pci/drm/amd/amdkfd/kfd_events.c
index ea379224920..6798510c4a7 100644
--- sys/dev/pci/drm/amd/amdkfd/kfd_events.c
+++ sys/dev/pci/drm/amd/amdkfd/kfd_events.c
@@ -1315,6 +1315,7 @@ void kfd_signal_poison_consumed_event(struct kfd_node 
*dev, u32 pasid)
        user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
        if (unlikely(user_gpu_id == -EINVAL)) {
                WARN_ONCE(1, "Could not get user_gpu_id from dev->id:%x\n", 
dev->id);
+               kfd_unref_process(p);
                return;
        }
 
diff --git sys/dev/pci/drm/amd/amdkfd/kfd_packet_manager_v9.c 
sys/dev/pci/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 1f9f5bfeaf8..d87b895660c 100644
--- sys/dev/pci/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ sys/dev/pci/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -237,7 +237,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, 
uint32_t *buffer,
 
        packet->bitfields2.engine_sel =
                engine_sel__mes_map_queues__compute_vi;
-       packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
+       packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0;
        packet->bitfields2.extended_engine_sel =
                extended_engine_sel__mes_map_queues__legacy_engine_sel;
        packet->bitfields2.queue_type =
diff --git sys/dev/pci/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c 
sys/dev/pci/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index 7e3d506bb79..f3aa93ddbf9 100644
--- sys/dev/pci/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ sys/dev/pci/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -762,6 +762,7 @@ static void 
populate_dml21_plane_config_from_plane_state(struct dml2_context *dm
                plane->pixel_format = dml2_420_10;
                break;
        case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+       case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
        case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
        case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
                plane->pixel_format = dml2_444_64;
diff --git 
sys/dev/pci/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c 
sys/dev/pci/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
index 0090b7bc232..157903115f3 100644
--- 
sys/dev/pci/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
+++ 
sys/dev/pci/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c
@@ -4651,7 +4651,10 @@ static void calculate_tdlut_setting(
        //the tdlut is fetched during the 2 row times of prefetch.
        if (p->setup_for_tdlut) {
                *p->tdlut_groups_per_2row_ub = (unsigned 
int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 
1);
-               *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - 
p->cursor_buffer_size * 1024) / tdlut_drain_rate;
+               if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024)
+                       *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - 
p->cursor_buffer_size * 1024) / tdlut_drain_rate;
+               else
+                       *p->tdlut_opt_time = 0;
                *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / 
tdlut_drain_rate;
        }
 
diff --git sys/dev/pci/drm/amd/display/dc/dml2/dml2_translation_helper.c 
sys/dev/pci/drm/amd/display/dc/dml2/dml2_translation_helper.c
index 92a3fff1e26..405aefd14d9 100644
--- sys/dev/pci/drm/amd/display/dc/dml2/dml2_translation_helper.c
+++ sys/dev/pci/drm/amd/display/dc/dml2/dml2_translation_helper.c
@@ -909,6 +909,7 @@ static void populate_dml_surface_cfg_from_plane_state(enum 
dml_project_id dml2_p
                out->SourcePixelFormat[location] = dml_420_10;
                break;
        case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+       case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616:
        case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
        case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
                out->SourcePixelFormat[location] = dml_444_64;
diff --git sys/dev/pci/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c 
sys/dev/pci/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
index 4243d419a50..d97df583159 100644
--- sys/dev/pci/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
+++ sys/dev/pci/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c
@@ -951,8 +951,8 @@ void dce110_edp_backlight_control(
        struct dc_context *ctx = link->ctx;
        struct bp_transmitter_control cntl = { 0 };
        uint8_t pwrseq_instance = 0;
-       unsigned int pre_T11_delay = OLED_PRE_T11_DELAY;
-       unsigned int post_T7_delay = OLED_POST_T7_DELAY;
+       unsigned int pre_T11_delay = (link->dpcd_sink_ext_caps.bits.oled ? 
OLED_PRE_T11_DELAY : 0);
+       unsigned int post_T7_delay = (link->dpcd_sink_ext_caps.bits.oled ? 
OLED_POST_T7_DELAY : 0);
 
        if (dal_graphics_object_id_get_connector_id(link->link_enc->connector)
                != CONNECTOR_ID_EDP) {
@@ -1067,7 +1067,8 @@ void dce110_edp_backlight_control(
        if (!enable) {
                /*follow oem panel config's requirement*/
                pre_T11_delay += link->panel_config.pps.extra_pre_t11_ms;
-               drm_msleep(pre_T11_delay);
+               if (pre_T11_delay)
+                       drm_msleep(pre_T11_delay);
        }
 }
 
@@ -1216,7 +1217,7 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx)
        struct dce_hwseq *hws = link->dc->hwseq;
 
        if (link->local_sink && link->local_sink->sink_signal == 
SIGNAL_TYPE_EDP) {
-               if (!link->skip_implict_edp_power_control)
+               if (!link->skip_implict_edp_power_control && hws)
                        hws->funcs.edp_backlight_control(link, false);
                link->dc->hwss.set_abm_immediate_disable(pipe_ctx);
        }
diff --git sys/dev/pci/drm/amd/display/modules/hdcp/hdcp_psp.c 
sys/dev/pci/drm/amd/display/modules/hdcp/hdcp_psp.c
index 8c137d7c032..e58e7b93810 100644
--- sys/dev/pci/drm/amd/display/modules/hdcp/hdcp_psp.c
+++ sys/dev/pci/drm/amd/display/modules/hdcp/hdcp_psp.c
@@ -368,6 +368,9 @@ enum mod_hdcp_status 
mod_hdcp_hdcp1_enable_encryption(struct mod_hdcp *hdcp)
        struct mod_hdcp_display *display = get_first_active_display(hdcp);
        enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS;
 
+       if (!display)
+               return MOD_HDCP_STATUS_DISPLAY_NOT_FOUND;
+
        mutex_lock(&psp->hdcp_context.mutex);
        hdcp_cmd = (struct ta_hdcp_shared_memory 
*)psp->hdcp_context.context.mem_context.shared_buf;
        memset(hdcp_cmd, 0, sizeof(struct ta_hdcp_shared_memory));
diff --git sys/dev/pci/drm/drm_fbdev_dma.c sys/dev/pci/drm/drm_fbdev_dma.c
index 37de807aeb4..ed3e618762f 100644
--- sys/dev/pci/drm/drm_fbdev_dma.c
+++ sys/dev/pci/drm/drm_fbdev_dma.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: MIT
 
 #include <linux/fb.h>
+#include <linux/vmalloc.h>
 
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
@@ -80,36 +81,42 @@ static const struct fb_ops drm_fbdev_dma_fb_ops = {
 #endif
 };
 
-FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma,
+#ifdef __linux__
+FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma_shadowed,
                                   drm_fb_helper_damage_range,
                                   drm_fb_helper_damage_area);
 
-#ifdef __linux__
-static int drm_fbdev_dma_deferred_fb_mmap(struct fb_info *info, struct 
vm_area_struct *vma)
+static void drm_fbdev_dma_shadowed_fb_destroy(struct fb_info *info)
 {
        struct drm_fb_helper *fb_helper = info->par;
-       struct drm_framebuffer *fb = fb_helper->fb;
-       struct drm_gem_dma_object *dma = drm_fb_dma_get_gem_obj(fb, 0);
+       void *shadow = info->screen_buffer;
 
-       if (!dma->map_noncoherent)
-               vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+       if (!fb_helper->dev)
+               return;
 
-       return fb_deferred_io_mmap(info, vma);
+       if (info->fbdefio)
+               fb_deferred_io_cleanup(info);
+       drm_fb_helper_fini(fb_helper);
+       vfree(shadow);
+
+       drm_client_buffer_vunmap(fb_helper->buffer);
+       drm_client_framebuffer_delete(fb_helper->buffer);
+       drm_client_release(&fb_helper->client);
+       drm_fb_helper_unprepare(fb_helper);
+       kfree(fb_helper);
 }
-#endif
+#endif /* __linux__ */
 
-static const struct fb_ops drm_fbdev_dma_deferred_fb_ops = {
-#ifdef __linux__
+static const struct fb_ops drm_fbdev_dma_shadowed_fb_ops = {
+#ifdef notyet
        .owner = THIS_MODULE,
        .fb_open = drm_fbdev_dma_fb_open,
        .fb_release = drm_fbdev_dma_fb_release,
-       __FB_DEFAULT_DEFERRED_OPS_RDWR(drm_fbdev_dma),
-#endif
+       FB_DEFAULT_DEFERRED_OPS(drm_fbdev_dma_shadowed),
+       DRM_FB_HELPER_DEFAULT_OPS,
+       .fb_destroy = drm_fbdev_dma_shadowed_fb_destroy,
+#else
        DRM_FB_HELPER_DEFAULT_OPS,
-#ifdef notyet
-       __FB_DEFAULT_DEFERRED_OPS_DRAW(drm_fbdev_dma),
-       .fb_mmap = drm_fbdev_dma_deferred_fb_mmap,
-       .fb_destroy = drm_fbdev_dma_fb_destroy,
 #endif
 };
 
@@ -117,12 +124,71 @@ static const struct fb_ops drm_fbdev_dma_deferred_fb_ops 
= {
  * struct drm_fb_helper
  */
 
+static void drm_fbdev_dma_damage_blit_real(struct drm_fb_helper *fb_helper,
+                                          struct drm_clip_rect *clip,
+                                          struct iosys_map *dst)
+{
+       struct drm_framebuffer *fb = fb_helper->fb;
+       size_t offset = clip->y1 * fb->pitches[0];
+       size_t len = clip->x2 - clip->x1;
+       unsigned int y;
+       void *src;
+
+       switch (drm_format_info_bpp(fb->format, 0)) {
+       case 1:
+               offset += clip->x1 / 8;
+               len = DIV_ROUND_UP(len + clip->x1 % 8, 8);
+               break;
+       case 2:
+               offset += clip->x1 / 4;
+               len = DIV_ROUND_UP(len + clip->x1 % 4, 4);
+               break;
+       case 4:
+               offset += clip->x1 / 2;
+               len = DIV_ROUND_UP(len + clip->x1 % 2, 2);
+               break;
+       default:
+               offset += clip->x1 * fb->format->cpp[0];
+               len *= fb->format->cpp[0];
+               break;
+       }
+
+       src = fb_helper->info->screen_buffer + offset;
+       iosys_map_incr(dst, offset); /* go to first pixel within clip rect */
+
+       for (y = clip->y1; y < clip->y2; y++) {
+               iosys_map_memcpy_to(dst, 0, src, len);
+               iosys_map_incr(dst, fb->pitches[0]);
+               src += fb->pitches[0];
+       }
+}
+
+static int drm_fbdev_dma_damage_blit(struct drm_fb_helper *fb_helper,
+                                    struct drm_clip_rect *clip)
+{
+       struct drm_client_buffer *buffer = fb_helper->buffer;
+       struct iosys_map dst;
+
+       /*
+        * For fbdev emulation, we only have to protect against fbdev modeset
+        * operations. Nothing else will involve the client buffer's BO. So it
+        * is sufficient to acquire struct drm_fb_helper.lock here.
+        */
+       mutex_lock(&fb_helper->lock);
+
+       dst = buffer->map;
+       drm_fbdev_dma_damage_blit_real(fb_helper, clip, &dst);
+
+       mutex_unlock(&fb_helper->lock);
+
+       return 0;
+}
+
 static int drm_fbdev_dma_helper_fb_probe(struct drm_fb_helper *fb_helper,
                                         struct drm_fb_helper_surface_size 
*sizes)
 {
        return drm_fbdev_dma_driver_fbdev_probe(fb_helper, sizes);
 }
-
 static int drm_fbdev_dma_helper_fb_dirty(struct drm_fb_helper *helper,
                                         struct drm_clip_rect *clip)
 {
@@ -134,6 +200,10 @@ static int drm_fbdev_dma_helper_fb_dirty(struct 
drm_fb_helper *helper,
                return 0;
 
        if (helper->fb->funcs->dirty) {
+               ret = drm_fbdev_dma_damage_blit(helper, clip);
+               if (drm_WARN_ONCE(dev, ret, "Damage blitter failed: ret=%d\n", 
ret))
+                       return ret;
+
                ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1);
                if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", 
ret))
                        return ret;
@@ -151,14 +221,84 @@ static const struct drm_fb_helper_funcs 
drm_fbdev_dma_helper_funcs = {
  * struct drm_fb_helper
  */
 
+static int drm_fbdev_dma_driver_fbdev_probe_tail(struct drm_fb_helper 
*fb_helper,
+                                                struct 
drm_fb_helper_surface_size *sizes)
+{
+       struct drm_device *dev = fb_helper->dev;
+       struct drm_client_buffer *buffer = fb_helper->buffer;
+       struct drm_gem_dma_object *dma_obj = to_drm_gem_dma_obj(buffer->gem);
+       struct drm_framebuffer *fb = fb_helper->fb;
+       struct fb_info *info = fb_helper->info;
+       struct iosys_map map = buffer->map;
+
+       info->fbops = &drm_fbdev_dma_fb_ops;
+
+       /* screen */
+       info->flags |= FBINFO_VIRTFB; /* system memory */
+       if (dma_obj->map_noncoherent)
+               info->flags |= FBINFO_READS_FAST; /* signal caching */
+       info->screen_size = sizes->surface_height * fb->pitches[0];
+       info->screen_buffer = map.vaddr;
+       if (!(info->flags & FBINFO_HIDE_SMEM_START)) {
+               if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer)))
+                       info->fix.smem_start = 
page_to_phys(virt_to_page(info->screen_buffer));
+       }
+       info->fix.smem_len = info->screen_size;
+
+       return 0;
+}
+
+static int drm_fbdev_dma_driver_fbdev_probe_tail_shadowed(struct drm_fb_helper 
*fb_helper,
+                                                         struct 
drm_fb_helper_surface_size *sizes)
+{
+       struct drm_client_buffer *buffer = fb_helper->buffer;
+       struct fb_info *info = fb_helper->info;
+       size_t screen_size = buffer->gem->size;
+       void *screen_buffer;
+       int ret;
+
+       /*
+        * Deferred I/O requires struct page for framebuffer memory,
+        * which is not guaranteed for all DMA ranges. We thus create
+        * a shadow buffer in system memory.
+        */
+       screen_buffer = vzalloc(screen_size);
+       if (!screen_buffer)
+               return -ENOMEM;
+
+       info->fbops = &drm_fbdev_dma_shadowed_fb_ops;
+
+       /* screen */
+       info->flags |= FBINFO_VIRTFB; /* system memory */
+       info->flags |= FBINFO_READS_FAST; /* signal caching */
+       info->screen_buffer = screen_buffer;
+       info->fix.smem_len = screen_size;
+
+#ifdef notyet
+       fb_helper->fbdefio.delay = HZ / 20;
+       fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io;
+
+       info->fbdefio = &fb_helper->fbdefio;
+       ret = fb_deferred_io_init(info);
+       if (ret)
+               goto err_vfree;
+
+       return 0;
+
+err_vfree:
+       vfree(screen_buffer);
+       return ret;
+#else
+       return 0;
+#endif
+}
+
 int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper,
                                     struct drm_fb_helper_surface_size *sizes)
 {
        struct drm_client_dev *client = &fb_helper->client;
        struct drm_device *dev = fb_helper->dev;
-       bool use_deferred_io = false;
        struct drm_client_buffer *buffer;
-       struct drm_gem_dma_object *dma_obj;
        struct drm_framebuffer *fb;
        struct fb_info *info;
        u32 format;
@@ -175,19 +315,9 @@ int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper 
*fb_helper,
                                               sizes->surface_height, format);
        if (IS_ERR(buffer))
                return PTR_ERR(buffer);
-       dma_obj = to_drm_gem_dma_obj(buffer->gem);
 
        fb = buffer->fb;
 
-       /*
-        * Deferred I/O requires struct page for framebuffer memory,
-        * which is not guaranteed for all DMA ranges. We thus only
-        * install deferred I/O if we have a framebuffer that requires
-        * it.
-        */
-       if (fb->funcs->dirty)
-               use_deferred_io = true;
-
        ret = drm_client_buffer_vmap(buffer, &map);
        if (ret) {
                goto err_drm_client_buffer_delete;
@@ -208,54 +338,17 @@ int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper 
*fb_helper,
 
        drm_fb_helper_fill_info(info, fb_helper, sizes);
 
-       if (use_deferred_io)
-               info->fbops = &drm_fbdev_dma_deferred_fb_ops;
+       if (fb->funcs->dirty)
+               ret = drm_fbdev_dma_driver_fbdev_probe_tail_shadowed(fb_helper, 
sizes);
        else
-               info->fbops = &drm_fbdev_dma_fb_ops;
-
-       /* screen */
-       info->flags |= FBINFO_VIRTFB; /* system memory */
-       if (dma_obj->map_noncoherent)
-               info->flags |= FBINFO_READS_FAST; /* signal caching */
-       info->screen_size = sizes->surface_height * fb->pitches[0];
-       info->screen_buffer = map.vaddr;
-       if (!(info->flags & FBINFO_HIDE_SMEM_START)) {
-               if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer)))
-                       info->fix.smem_start = 
page_to_phys(virt_to_page(info->screen_buffer));
-       }
-       info->fix.smem_len = info->screen_size;
-
-       /*
-        * Only set up deferred I/O if the screen buffer supports
-        * it. If this disagrees with the previous test for ->dirty,
-        * mmap on the /dev/fb file might not work correctly.
-        */
-       if (!is_vmalloc_addr(info->screen_buffer) && info->fix.smem_start) {
-               unsigned long pfn = info->fix.smem_start >> PAGE_SHIFT;
-
-               if (drm_WARN_ON(dev, !pfn_to_page(pfn)))
-                       use_deferred_io = false;
-       }
-
-#ifdef notyet
-       /* deferred I/O */
-       if (use_deferred_io) {
-               fb_helper->fbdefio.delay = HZ / 20;
-               fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io;
-
-               info->fbdefio = &fb_helper->fbdefio;
-               ret = fb_deferred_io_init(info);
-               if (ret)
-                       goto err_drm_fb_helper_release_info;
-       }
-#endif
+               ret = drm_fbdev_dma_driver_fbdev_probe_tail(fb_helper, sizes);
+       if (ret)
+               goto err_drm_fb_helper_release_info;
 
        return 0;
 
-#ifdef notyet
 err_drm_fb_helper_release_info:
        drm_fb_helper_release_info(fb_helper);
-#endif
 err_drm_client_buffer_vunmap:
        fb_helper->fb = NULL;
        fb_helper->buffer = NULL;
diff --git sys/dev/pci/drm/i915/display/vlv_dsi.c 
sys/dev/pci/drm/i915/display/vlv_dsi.c
index b98b5d7b673..e4b42a04b62 100644
--- sys/dev/pci/drm/i915/display/vlv_dsi.c
+++ sys/dev/pci/drm/i915/display/vlv_dsi.c
@@ -1059,7 +1059,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder 
*encoder,
                                              BXT_MIPI_TRANS_VACTIVE(port));
        adjusted_mode->crtc_vtotal =
                                intel_de_read(display,
-                                             BXT_MIPI_TRANS_VTOTAL(port));
+                                             BXT_MIPI_TRANS_VTOTAL(port)) + 1;
 
        hactive = adjusted_mode->crtc_hdisplay;
        hfp = intel_de_read(display, MIPI_HFP_COUNT(display, port));
@@ -1264,7 +1264,7 @@ static void set_dsi_timings(struct intel_encoder *encoder,
                        intel_de_write(display, BXT_MIPI_TRANS_VACTIVE(port),
                                       adjusted_mode->crtc_vdisplay);
                        intel_de_write(display, BXT_MIPI_TRANS_VTOTAL(port),
-                                      adjusted_mode->crtc_vtotal);
+                                      adjusted_mode->crtc_vtotal - 1);
                }
 
                intel_de_write(display, MIPI_HACTIVE_AREA_COUNT(display, port),
diff --git sys/dev/pci/drm/i915/i915_pmu.c sys/dev/pci/drm/i915/i915_pmu.c
index 05dd4771e33..3ff3078fa87 100644
--- sys/dev/pci/drm/i915/i915_pmu.c
+++ sys/dev/pci/drm/i915/i915_pmu.c
@@ -111,7 +111,7 @@ static unsigned int config_bit(const u64 config)
                return other_bit(config);
 }
 
-static u32 config_mask(const u64 config)
+static __always_inline u32 config_mask(const u64 config)
 {
        unsigned int bit = config_bit(config);
 
diff --git sys/dev/pci/drm/scheduler/sched_entity.c 
sys/dev/pci/drm/scheduler/sched_entity.c
index 9ee73b5bc69..55ae6e563f3 100644
--- sys/dev/pci/drm/scheduler/sched_entity.c
+++ sys/dev/pci/drm/scheduler/sched_entity.c
@@ -189,6 +189,7 @@ static void drm_sched_entity_kill_jobs_work(struct 
work_struct *wrk)
 {
        struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
 
+       drm_sched_fence_scheduled(job->s_fence, NULL);
        drm_sched_fence_finished(job->s_fence, -ESRCH);
        WARN_ON(job->s_fence->parent);
        job->sched->ops->free_job(job);

Reply via email to