On Mon, Dec 04, 2023 at 06:33:01PM +0100, Boris Brezillon wrote:
> Contains everything that's FW related, that includes the code dealing
> with the microcontroller unit (MCU) that's running the FW, and anything
> related to allocating memory shared between the FW and the CPU.
> 
> A few global FW events are processed in the IRQ handler, the rest is
> forwarded to the scheduler, since scheduling is the primary reason for
> the FW existence, and also the main source of FW <-> kernel
> interactions.
> 
> v3:
> - Make the FW path more future-proof (Liviu)
> - Use one waitqueue for all FW events
> - Simplify propagation of FW events to the scheduler logic
> - Drop the panthor_fw_mem abstraction and use panthor_kernel_bo instead
> - Account for the panthor_vm changes
> - Replace magic number with 0x7fffffff with ~0 to better signify that
>   it's the maximum permitted value.
> - More accurate rounding when computing the firmware timeout.
> - Add a 'sub iterator' helper function. This also adds a check that a
>   firmware entry doesn't overflow the firmware image.
> - Drop __packed from FW structures, natural alignment is good enough.
> - Other minor code improvements.
> 
> Signed-off-by: Boris Brezillon <boris.brezil...@collabora.com>
> Signed-off-by: Steven Price <steven.pr...@arm.com>

Hi Boris,

While looking at Chris' comments, I have discovered another issue.

> ---
>  drivers/gpu/drm/panthor/panthor_fw.c | 1332 ++++++++++++++++++++++++++
>  drivers/gpu/drm/panthor/panthor_fw.h |  504 ++++++++++
>  2 files changed, 1836 insertions(+)
>  create mode 100644 drivers/gpu/drm/panthor/panthor_fw.c
>  create mode 100644 drivers/gpu/drm/panthor/panthor_fw.h
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_fw.c 
> b/drivers/gpu/drm/panthor/panthor_fw.c
> new file mode 100644
> index 000000000000..85afe769f567
> --- /dev/null
> +++ b/drivers/gpu/drm/panthor/panthor_fw.c

<snip>

> +static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
> +                                      const struct firmware *fw,
> +                                      struct panthor_fw_binary_iter *iter,
> +                                      u32 ehdr)
> +{
> +     struct panthor_fw_binary_section_entry_hdr hdr;
> +     struct panthor_fw_section *section;
> +     u32 section_size;
> +     u32 name_len;
> +     int ret;
> +
> +     ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
> +     if (ret)
> +             return ret;
> +
> +     if (hdr.data.end < hdr.data.start) {
> +             drm_err(&ptdev->base, "Firmware corrupted, data.end < 
> data.start (0x%x < 0x%x)\n",
> +                     hdr.data.end, hdr.data.start);
> +             return -EINVAL;
> +     }
> +
> +     if (hdr.va.end < hdr.va.start) {
> +             drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < 
> hdr.va.start (0x%x < 0x%x)\n",
> +                     hdr.va.end, hdr.va.start);
> +             return -EINVAL;
> +     }
> +
> +     if (hdr.data.end > fw->size) {
> +             drm_err(&ptdev->base, "Firmware corrupted, file truncated? 
> data_end=0x%x > fw size=0x%zx\n",
> +                     hdr.data.end, fw->size);
> +             return -EINVAL;
> +     }
> +
> +     if ((hdr.va.start & ~PAGE_MASK) != 0 ||
> +         (hdr.va.end & ~PAGE_MASK) != 0) {
> +             drm_err(&ptdev->base, "Firmware corrupted, virtual addresses 
> not page aligned: 0x%x-0x%x\n",
> +                     hdr.va.start, hdr.va.end);
> +             return -EINVAL;
> +     }
> +
> +     if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
> +             drm_err(&ptdev->base, "Firmware contains interface with 
> unsupported flags (0x%x)\n",
> +                     hdr.flags);
> +             return -EINVAL;
> +     }
> +
> +     if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
> +             drm_warn(&ptdev->base,
> +                      "Firmware protected mode entry not be supported, 
> ignoring");
> +             return 0;
> +     }
> +
> +     if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
> +         !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
> +             drm_err(&ptdev->base,
> +                     "Interface at 0x%llx must be shared", 
> CSF_MCU_SHARED_REGION_START);
> +             return -EINVAL;
> +     }
> +
> +     name_len = iter->size - iter->offset;
> +
> +     section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
> +     if (!section)
> +             return -ENOMEM;
> +
> +     list_add_tail(&section->node, &ptdev->fw->sections);
> +     section->flags = hdr.flags;
> +     section->data.size = hdr.data.end - hdr.data.start;
> +
> +     if (section->data.size > 0) {
> +             void *data = drmm_kmalloc(&ptdev->base, section->data.size, 
> GFP_KERNEL);
> +
> +             if (!data)
> +                     return -ENOMEM;
> +
> +             memcpy(data, fw->data + hdr.data.start, section->data.size);
> +             section->data.buf = data;
> +     }
> +
> +     if (name_len > 0) {
> +             char *name = drmm_kmalloc(&ptdev->base, name_len + 1, 
> GFP_KERNEL);
> +
> +             if (!name)
> +                     return -ENOMEM;
> +
> +             memcpy(name, iter->data + iter->offset, name_len);
> +             name[name_len] = '\0';
> +             section->name = name;
> +     }
> +
> +     section_size = hdr.va.end - hdr.va.start;
> +     if (section_size) {
> +             u32 cache_mode = hdr.flags & 
> CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
> +             struct panthor_gem_object *bo;
> +             u32 vm_map_flags = 0;
> +             struct sg_table *sgt;
> +             u64 va = hdr.va.start;
> +
> +             if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
> +                     vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
> +
> +             if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
> +                     vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
> +
> +             /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are 
> mapped to
> +              * non-cacheable for now. We might want to introduce a new
> +              * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
> +              * memory and is currently not used by our driver) for
> +              * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
> +              * of IO-coherent systems.
> +              */
> +             if (cache_mode != 
> CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
> +                     vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
> +
> +             section->mem = panthor_kernel_bo_create(ptdev, 
> panthor_fw_vm(ptdev),
> +                                                     section_size,
> +                                                     DRM_PANTHOR_BO_NO_MMAP,
> +                                                     vm_map_flags, va);
> +             if (IS_ERR(section->mem))
> +                     return PTR_ERR(section->mem);
> +
> +             if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != 
> hdr.va.start))
> +                     return -EINVAL;
> +
> +             if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
> +                     ret = panthor_kernel_bo_vmap(section->mem);
> +                     if (ret)
> +                             return ret;
> +             }
> +
> +             panthor_fw_init_section_mem(ptdev, section);
> +
> +             bo = to_panthor_bo(section->mem->obj);
> +             sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
> +             if (IS_ERR(sgt))
> +                     return PTR_ERR(section->mem);

I think here we should return PTR_ERR(sgt).

In general I agree with Chris that the list_add_tail() call should be delayed
until all of the above allocations and preparations have succeeded.

Best regards,
Liviu

> +
> +             dma_sync_sgtable_for_device(ptdev->base.dev, sgt, 
> DMA_TO_DEVICE);
> +     }
> +
> +     if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
> +             ptdev->fw->shared_section = section;
> +
> +     return 0;
> +}
> +
> +static void
> +panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
> +{
> +     struct panthor_fw_section *section;
> +
> +     list_for_each_entry(section, &ptdev->fw->sections, node) {
> +             struct sg_table *sgt;
> +
> +             if (!full_reload && !(section->flags & 
> CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
> +                     continue;
> +
> +             panthor_fw_init_section_mem(ptdev, section);
> +             sgt = 
> drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
> +             if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
> +                     dma_sync_sgtable_for_device(ptdev->base.dev, sgt, 
> DMA_TO_DEVICE);
> +     }
> +}
> +
> +static int panthor_fw_load_entry(struct panthor_device *ptdev,
> +                              const struct firmware *fw,
> +                              struct panthor_fw_binary_iter *iter)
> +{
> +     struct panthor_fw_binary_iter eiter;
> +     u32 ehdr;
> +     int ret;
> +
> +     ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
> +     if (ret)
> +             return ret;
> +
> +     if ((iter->offset % sizeof(u32)) ||
> +         (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
> +             drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, 
> offset=0x%x size=0x%x\n",
> +                     (u32)(iter->offset - sizeof(u32)), 
> CSF_FW_BINARY_ENTRY_SIZE(ehdr));
> +             return -EINVAL;
> +     }
> +
> +     if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
> +                                         CSF_FW_BINARY_ENTRY_SIZE(ehdr) - 
> sizeof(ehdr)))
> +             return -EINVAL;
> +
> +     switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
> +     case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
> +             return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
> +
> +     /* FIXME: handle those entry types? */
> +     case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
> +     case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
> +     case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
> +     case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
> +             return 0;
> +     default:
> +             break;
> +     }
> +
> +     if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
> +             return 0;
> +
> +     drm_err(&ptdev->base,
> +             "Unsupported non-optional entry type %u in firmware\n",
> +             CSF_FW_BINARY_ENTRY_TYPE(ehdr));
> +     return -EINVAL;
> +}
> +
> +static int panthor_fw_load(struct panthor_device *ptdev)
> +{
> +     const struct firmware *fw = NULL;
> +     struct panthor_fw_binary_iter iter = {};
> +     struct panthor_fw_binary_hdr hdr;
> +     char fw_path[128];
> +     int ret;
> +
> +     snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
> +              (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
> +              (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
> +              CSF_FW_NAME);
> +
> +     ret = request_firmware(&fw, fw_path, ptdev->base.dev);
> +     if (ret) {
> +             drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
> +                     CSF_FW_NAME);
> +             return ret;
> +     }
> +
> +     iter.data = fw->data;
> +     iter.size = fw->size;
> +     ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
> +     if (ret)
> +             goto out;
> +
> +     if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
> +             ret = -EINVAL;
> +             drm_err(&ptdev->base, "Invalid firmware magic\n");
> +             goto out;
> +     }
> +
> +     if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
> +             ret = -EINVAL;
> +             drm_err(&ptdev->base, "Unsupported firmware binary header 
> version %d.%d (expected %d.x)\n",
> +                     hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
> +             goto out;
> +     }
> +
> +     if (hdr.size > iter.size) {
> +             drm_err(&ptdev->base, "Firmware image is truncated\n");
> +             goto out;
> +     }
> +
> +     iter.size = hdr.size;
> +
> +     while (iter.offset < hdr.size) {
> +             ret = panthor_fw_load_entry(ptdev, fw, &iter);
> +             if (ret)
> +                     goto out;
> +     }
> +
> +     if (!ptdev->fw->shared_section) {
> +             drm_err(&ptdev->base, "Shared interface region not found\n");
> +             ret = -EINVAL;
> +             goto out;
> +     }
> +
> +out:
> +     release_firmware(fw);
> +     return ret;
> +}
> +
> +/**
> + * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
> + * @ptdev: Device.
> + * @mcu_va: MCU address.
> + *
> + * Return: NULL if the address is not part of the shared section, non-NULL 
> otherwise.
> + */
> +static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
> +{
> +     u64 shared_mem_start = 
> panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
> +     u64 shared_mem_end = shared_mem_start +
> +                          
> panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
> +     if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
> +             return NULL;
> +
> +     return ptdev->fw->shared_section->mem->kmap + (mcu_va - 
> shared_mem_start);
> +}
> +
> +static int panthor_init_cs_iface(struct panthor_device *ptdev,
> +                              unsigned int csg_idx, unsigned int cs_idx)
> +{
> +     struct panthor_fw_global_iface *glb_iface = 
> panthor_fw_get_glb_iface(ptdev);
> +     struct panthor_fw_csg_iface *csg_iface = 
> panthor_fw_get_csg_iface(ptdev, csg_idx);
> +     struct panthor_fw_cs_iface *cs_iface = 
> &ptdev->fw->iface.streams[csg_idx][cs_idx];
> +     u64 shared_section_sz = 
> panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
> +     u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
> +                        (csg_idx * glb_iface->control->group_stride) +
> +                        CSF_STREAM_CONTROL_OFFSET +
> +                        (cs_idx * csg_iface->control->stream_stride);
> +     struct panthor_fw_cs_iface *first_cs_iface =
> +             panthor_fw_get_cs_iface(ptdev, 0, 0);
> +
> +     if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
> +             return -EINVAL;
> +
> +     spin_lock_init(&cs_iface->lock);
> +     cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
> +     cs_iface->input = iface_fw_to_cpu_addr(ptdev, 
> cs_iface->control->input_va);
> +     cs_iface->output = iface_fw_to_cpu_addr(ptdev, 
> cs_iface->control->output_va);
> +
> +     if (!cs_iface->input || !cs_iface->output) {
> +             drm_err(&ptdev->base, "Invalid stream control interface 
> input/output VA");
> +             return -EINVAL;
> +     }
> +
> +     if (cs_iface != first_cs_iface) {
> +             if (cs_iface->control->features != 
> first_cs_iface->control->features) {
> +                     drm_err(&ptdev->base, "Expecting identical CS slots");
> +                     return -EINVAL;
> +             }
> +     } else {
> +             u32 reg_count = 
> CS_FEATURES_WORK_REGS(cs_iface->control->features);
> +
> +             ptdev->csif_info.cs_reg_count = reg_count;
> +             ptdev->csif_info.unpreserved_cs_reg_count = 
> CSF_UNPRESERVED_REG_COUNT;
> +     }
> +
> +     return 0;
> +}
> +
> +static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
> +                     const struct panthor_fw_csg_control_iface *b)
> +{
> +     if (a->features != b->features)
> +             return false;
> +     if (a->suspend_size != b->suspend_size)
> +             return false;
> +     if (a->protm_suspend_size != b->protm_suspend_size)
> +             return false;
> +     if (a->stream_num != b->stream_num)
> +             return false;
> +     return true;
> +}
> +
> +static int panthor_init_csg_iface(struct panthor_device *ptdev,
> +                               unsigned int csg_idx)
> +{
> +     struct panthor_fw_global_iface *glb_iface = 
> panthor_fw_get_glb_iface(ptdev);
> +     struct panthor_fw_csg_iface *csg_iface = 
> &ptdev->fw->iface.groups[csg_idx];
> +     u64 shared_section_sz = 
> panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
> +     u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * 
> glb_iface->control->group_stride);
> +     unsigned int i;
> +
> +     if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
> +             return -EINVAL;
> +
> +     spin_lock_init(&csg_iface->lock);
> +     csg_iface->control = ptdev->fw->shared_section->mem->kmap + 
> iface_offset;
> +     csg_iface->input = iface_fw_to_cpu_addr(ptdev, 
> csg_iface->control->input_va);
> +     csg_iface->output = iface_fw_to_cpu_addr(ptdev, 
> csg_iface->control->output_va);
> +
> +     if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
> +         csg_iface->control->stream_num > MAX_CS_PER_CSG)
> +             return -EINVAL;
> +
> +     if (!csg_iface->input || !csg_iface->output) {
> +             drm_err(&ptdev->base, "Invalid group control interface 
> input/output VA");
> +             return -EINVAL;
> +     }
> +
> +     if (csg_idx > 0) {
> +             struct panthor_fw_csg_iface *first_csg_iface =
> +                     panthor_fw_get_csg_iface(ptdev, 0);
> +
> +             if (!compare_csg(first_csg_iface->control, csg_iface->control)) 
> {
> +                     drm_err(&ptdev->base, "Expecting identical CSG slots");
> +                     return -EINVAL;
> +             }
> +     }
> +
> +     for (i = 0; i < csg_iface->control->stream_num; i++) {
> +             int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
> +
> +             if (ret)
> +                     return ret;
> +     }
> +
> +     return 0;
> +}
> +
> +static u32 panthor_get_instr_features(struct panthor_device *ptdev)
> +{
> +     struct panthor_fw_global_iface *glb_iface = 
> panthor_fw_get_glb_iface(ptdev);
> +
> +     if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
> +             return 0;
> +
> +     return glb_iface->control->instr_features;
> +}
> +
> +static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
> +{
> +     struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
> +     unsigned int i;
> +
> +     if (!ptdev->fw->shared_section->mem->kmap)
> +             return -EINVAL;
> +
> +     spin_lock_init(&glb_iface->lock);
> +     glb_iface->control = ptdev->fw->shared_section->mem->kmap;
> +
> +     if (!glb_iface->control->version) {
> +             drm_err(&ptdev->base, "Firmware version is 0. Firmware may have 
> failed to boot");
> +             return -EINVAL;
> +     }
> +
> +     glb_iface->input = iface_fw_to_cpu_addr(ptdev, 
> glb_iface->control->input_va);
> +     glb_iface->output = iface_fw_to_cpu_addr(ptdev, 
> glb_iface->control->output_va);
> +     if (!glb_iface->input || !glb_iface->output) {
> +             drm_err(&ptdev->base, "Invalid global control interface 
> input/output VA");
> +             return -EINVAL;
> +     }
> +
> +     if (glb_iface->control->group_num > MAX_CSGS ||
> +         glb_iface->control->group_num < MIN_CSGS) {
> +             drm_err(&ptdev->base, "Invalid number of control groups");
> +             return -EINVAL;
> +     }
> +
> +     for (i = 0; i < glb_iface->control->group_num; i++) {
> +             int ret = panthor_init_csg_iface(ptdev, i);
> +
> +             if (ret)
> +                     return ret;
> +     }
> +
> +     drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation 
> features %#x",
> +              CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
> +              CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
> +              CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
> +              glb_iface->control->features,
> +              panthor_get_instr_features(ptdev));
> +     return 0;
> +}
> +
> +static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
> +{
> +     struct panthor_fw_global_iface *glb_iface = 
> panthor_fw_get_glb_iface(ptdev);
> +
> +     /* Enable all cores. */
> +     glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
> +
> +     /* Setup timers. */
> +     glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, 
> PWROFF_HYSTERESIS_US);
> +     glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> 
> PROGRESS_TIMEOUT_SCALE_SHIFT;
> +     glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, 
> IDLE_HYSTERESIS_US);
> +
> +     /* Enable interrupts we care about. */
> +     glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
> +                                      GLB_PING |
> +                                      GLB_CFG_PROGRESS_TIMER |
> +                                      GLB_CFG_POWEROFF_TIMER |
> +                                      GLB_IDLE_EN |
> +                                      GLB_IDLE;
> +
> +     panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
> +     panthor_fw_toggle_reqs(glb_iface, req, ack,
> +                            GLB_CFG_ALLOC_EN |
> +                            GLB_CFG_POWEROFF_TIMER |
> +                            GLB_CFG_PROGRESS_TIMER);
> +
> +     gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
> +
> +     /* Kick the watchdog. */
> +     mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
> +                      msecs_to_jiffies(PING_INTERVAL_MS));
> +}
> +
> +static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
> +{
> +     if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
> +             ptdev->fw->booted = true;
> +
> +     wake_up_all(&ptdev->fw->req_waitqueue);
> +
> +     /* If the FW is not booted, don't process IRQs, just flag the FW as 
> booted. */
> +     if (!ptdev->fw->booted)
> +             return;
> +
> +     panthor_sched_report_fw_events(ptdev, status);
> +}
> +PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
> +
> +static int panthor_fw_start(struct panthor_device *ptdev)
> +{
> +     bool timedout = false;
> +
> +     ptdev->fw->booted = false;
> +     panthor_job_irq_resume(&ptdev->fw->irq, ~0);
> +     gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
> +
> +     if (!wait_event_timeout(ptdev->fw->req_waitqueue,
> +                             ptdev->fw->booted,
> +                             msecs_to_jiffies(1000))) {
> +             if (!ptdev->fw->booted &&
> +                 !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
> +                     timedout = true;
> +     }
> +
> +     if (timedout) {
> +             drm_err(&ptdev->base, "Failed to boot MCU");
> +             return -ETIMEDOUT;
> +     }
> +
> +     return 0;
> +}
> +
> +static void panthor_fw_stop(struct panthor_device *ptdev)
> +{
> +     u32 status;
> +
> +     gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
> +     if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
> +                            status == MCU_STATUS_DISABLED, 10, 100000))
> +             drm_err(&ptdev->base, "Failed to stop MCU");
> +}
> +
> +/**
> + * panthor_fw_pre_reset() - Call before a reset.
> + * @ptdev: Device.
> + * @on_hang: true if the reset was triggered on a GPU hang.
> + *
> + * If the reset is not triggered on a hang, we try to gracefully halt the
> + * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
> + */
> +void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
> +{
> +     /* Make sure we won't be woken up by a ping. */
> +     cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
> +
> +     ptdev->fw->fast_reset = false;
> +
> +     if (!on_hang) {
> +             struct panthor_fw_global_iface *glb_iface = 
> panthor_fw_get_glb_iface(ptdev);
> +             u32 status;
> +
> +             panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
> +             gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
> +             if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
> +                                     status == MCU_STATUS_HALT, 10, 100000) 
> &&
> +                 glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
> +                     ptdev->fw->fast_reset = true;
> +             } else {
> +                     drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
> +             }
> +
> +             /* The FW detects 0 -> 1 transitions. Make sure we reset
> +              * the HALT bit before the FW is rebooted.
> +              */
> +             panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
> +     }
> +
> +     panthor_job_irq_suspend(&ptdev->fw->irq);
> +}
> +
> +/**
> + * panthor_fw_post_reset() - Call after a reset.
> + * @ptdev: Device.
> + *
> + * Start the FW. If this is not a fast reset, all FW sections are reloaded to
> + * make sure we can recover from a memory corruption.
> + */
> +int panthor_fw_post_reset(struct panthor_device *ptdev)
> +{
> +     int ret;
> +
> +     /* Make the MCU VM active. */
> +     ret = panthor_vm_active(ptdev->fw->vm);
> +     if (ret)
> +             return ret;
> +
> +     /* Reload all sections, including RO ones. We're not supposed
> +      * to end up here anyway, let's just assume the overhead of
> +      * reloading everything is acceptable.
> +      */
> +     if (!ptdev->fw->fast_reset)
> +             panthor_reload_fw_sections(ptdev, true);
> +
> +     ret = panthor_fw_start(ptdev);
> +     if (ret)
> +             return ret;
> +
> +     /* We must re-initialize the global interface even on fast-reset. */
> +     panthor_fw_init_global_iface(ptdev);
> +     return 0;
> +}
> +
> +/**
> + * panthor_fw_unplug() - Called when the device is unplugged.
> + * @ptdev: Device.
> + *
> + * This function must make sure all pending operations are flushed before
> + * will release device resources, thus preventing any interaction with
> + * the HW.
> + *
> + * If there is still FW-related work running after this function returns,
> + * they must use drm_dev_{enter,exit}() and skip any HW access when
> + * drm_dev_enter() returns false.
> + */
> +void panthor_fw_unplug(struct panthor_device *ptdev)
> +{
> +     struct panthor_fw_section *section;
> +
> +     cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
> +
> +     /* Make sure the IRQ handler can be called after that point. */
> +     if (ptdev->fw->irq.irq)
> +             panthor_job_irq_suspend(&ptdev->fw->irq);
> +
> +     panthor_fw_stop(ptdev);
> +
> +     if (ptdev->fw->vm)
> +             panthor_vm_idle(ptdev->fw->vm);
> +
> +     list_for_each_entry(section, &ptdev->fw->sections, node)
> +             panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem);
> +
> +     panthor_vm_put(ptdev->fw->vm);
> +
> +     panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
> +}
> +
> +/**
> + * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
> + * @req_ptr: Pointer to the req register.
> + * @ack_ptr: Pointer to the ack register.
> + * @wq: Wait queue to use for the sleeping wait.
> + * @req_mask: Mask of requests to wait for.
> + * @acked: Pointer to field that's updated with the acked requests.
> + * If the function returns 0, *acked == req_mask.
> + * @timeout_ms: Timeout expressed in milliseconds.
> + *
> + * Return: 0 on success, -ETIMEDOUT otherwise.
> + */
> +static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
> +                             wait_queue_head_t *wq,
> +                             u32 req_mask, u32 *acked,
> +                             u32 timeout_ms)
> +{
> +     u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
> +     int ret;
> +
> +     /* Busy wait for a few µsecs before falling back to a sleeping wait. */
> +     *acked = req_mask;
> +     ret = read_poll_timeout_atomic(READ_ONCE, ack,
> +                                    (ack & req_mask) == req,
> +                                    0, 10, 0,
> +                                    *ack_ptr);
> +     if (!ret)
> +             return 0;
> +
> +     if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
> +                            msecs_to_jiffies(timeout_ms)))
> +             return 0;
> +
> +     /* Check one last time, in case we were not woken up for some reason. */
> +     ack = READ_ONCE(*ack_ptr);
> +     if ((ack & req_mask) == req)
> +             return 0;
> +
> +     *acked = ~(req ^ ack) & req_mask;
> +     return -ETIMEDOUT;
> +}
> +
> +/**
> + * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
> + * @ptdev: Device.
> + * @req_mask: Mask of requests to wait for.
> + * @acked: Pointer to field that's updated with the acked requests.
> + * If the function returns 0, *acked == req_mask.
> + * @timeout_ms: Timeout expressed in milliseconds.
> + *
> + * Return: 0 on success, -ETIMEDOUT otherwise.
> + */
> +int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
> +                          u32 req_mask, u32 *acked,
> +                          u32 timeout_ms)
> +{
> +     struct panthor_fw_global_iface *glb_iface = 
> panthor_fw_get_glb_iface(ptdev);
> +
> +     /* GLB_HALT doesn't get acked through the FW interface. */
> +     if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
> +             return -EINVAL;
> +
> +     return panthor_fw_wait_acks(&glb_iface->input->req,
> +                                 &glb_iface->output->ack,
> +                                 &ptdev->fw->req_waitqueue,
> +                                 req_mask, acked, timeout_ms);
> +}
> +
> +/**
> + * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be 
> acknowledged.
> + * @ptdev: Device.
> + * @csg_slot: CSG slot ID.
> + * @req_mask: Mask of requests to wait for.
> + * @acked: Pointer to field that's updated with the acked requests.
> + * If the function returns 0, *acked == req_mask.
> + * @timeout_ms: Timeout expressed in milliseconds.
> + *
> + * Return: 0 on success, -ETIMEDOUT otherwise.
> + */
> +int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
> +                          u32 req_mask, u32 *acked, u32 timeout_ms)
> +{
> +     struct panthor_fw_csg_iface *csg_iface = 
> panthor_fw_get_csg_iface(ptdev, csg_slot);
> +     int ret;
> +
> +     if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
> +             return -EINVAL;
> +
> +     ret = panthor_fw_wait_acks(&csg_iface->input->req,
> +                                &csg_iface->output->ack,
> +                                &ptdev->fw->req_waitqueue,
> +                                req_mask, acked, timeout_ms);
> +
> +     /*
> +      * Check that all bits in the state field were updated, is any mismatch
> +      * then clear all bits in the state field. This allows code to do
> +      * (acked & CSG_STATE_MASK) and get the right value.
> +      */
> +
> +     if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
> +             *acked &= ~CSG_STATE_MASK;
> +
> +     return ret;
> +}
> +
> +/**
> + * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
> + * @ptdev: Device.
> + * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
> + *
> + * This function is toggling bits in the doorbell_req and ringing the
> + * global doorbell. It doesn't require a user doorbell to be attached to
> + * the group.
> + */
> +void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 
> csg_mask)
> +{
> +     struct panthor_fw_global_iface *glb_iface = 
> panthor_fw_get_glb_iface(ptdev);
> +
> +     panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
> +     gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
> +}
> +
> +static void panthor_fw_ping_work(struct work_struct *work)
> +{
> +     struct panthor_fw *fw = container_of(work, struct panthor_fw, 
> watchdog.ping_work.work);
> +     struct panthor_device *ptdev = fw->irq.ptdev;
> +     struct panthor_fw_global_iface *glb_iface = 
> panthor_fw_get_glb_iface(ptdev);
> +     u32 acked;
> +     int ret;
> +
> +     if (panthor_device_reset_is_pending(ptdev))
> +             return;
> +
> +     panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
> +     gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
> +
> +     ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
> +     if (ret) {
> +             panthor_device_schedule_reset(ptdev);
> +             drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
> +     } else {
> +             mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
> +                              msecs_to_jiffies(PING_INTERVAL_MS));
> +     }
> +}
> +
> +/**
> + * panthor_fw_init() - Initialize FW related data.
> + * @ptdev: Device.
> + *
> + * Return: 0 on success, a negative error code otherwise.
> + */
> +int panthor_fw_init(struct panthor_device *ptdev)
> +{
> +     struct panthor_fw *fw;
> +     int ret, irq;
> +
> +     fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
> +     if (!fw)
> +             return -ENOMEM;
> +
> +     ptdev->fw = fw;
> +     init_waitqueue_head(&fw->req_waitqueue);
> +     INIT_LIST_HEAD(&fw->sections);
> +     INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
> +
> +     irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), 
> "job");
> +     if (irq <= 0)
> +             return -ENODEV;
> +
> +     ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
> +     if (ret) {
> +             drm_err(&ptdev->base, "failed to request job irq");
> +             return ret;
> +     }
> +
> +     ret = panthor_gpu_l2_power_on(ptdev);
> +     if (ret)
> +             return ret;
> +
> +     fw->vm = panthor_vm_create(ptdev, true,
> +                                0, SZ_4G,
> +                                CSF_MCU_SHARED_REGION_START,
> +                                CSF_MCU_SHARED_REGION_SIZE);
> +     if (IS_ERR(fw->vm)) {
> +             ret = PTR_ERR(fw->vm);
> +             fw->vm = NULL;
> +             goto err_unplug_fw;
> +     }
> +
> +     ret = panthor_fw_load(ptdev);
> +     if (ret)
> +             goto err_unplug_fw;
> +
> +     ret = panthor_vm_active(fw->vm);
> +     if (ret)
> +             goto err_unplug_fw;
> +
> +     ret = panthor_fw_start(ptdev);
> +     if (ret)
> +             goto err_unplug_fw;
> +
> +     ret = panthor_fw_init_ifaces(ptdev);
> +     if (ret)
> +             goto err_unplug_fw;
> +
> +     panthor_fw_init_global_iface(ptdev);
> +     return 0;
> +
> +err_unplug_fw:
> +     panthor_fw_unplug(ptdev);
> +     return ret;
> +}
> diff --git a/drivers/gpu/drm/panthor/panthor_fw.h 
> b/drivers/gpu/drm/panthor/panthor_fw.h
> new file mode 100644
> index 000000000000..1126b1ea199a
> --- /dev/null
> +++ b/drivers/gpu/drm/panthor/panthor_fw.h
> @@ -0,0 +1,504 @@
> +/* SPDX-License-Identifier: GPL-2.0 or MIT */
> +/* Copyright 2023 Collabora ltd. */
> +
> +#ifndef __PANTHOR_MCU_H__
> +#define __PANTHOR_MCU_H__
> +
> +#include <linux/types.h>
> +
> +#include "panthor_device.h"
> +
> +struct panthor_kernel_bo;
> +
> +#define MAX_CSGS                             31
> +#define MAX_CS_PER_CSG                          32
> +
> +struct panthor_fw_ringbuf_input_iface {
> +     u64 insert;
> +     u64 extract;
> +};
> +
> +struct panthor_fw_ringbuf_output_iface {
> +     u64 extract;
> +     u32 active;
> +};
> +
> +struct panthor_fw_cs_control_iface {
> +#define CS_FEATURES_WORK_REGS(x)             (((x) & GENMASK(7, 0)) + 1)
> +#define CS_FEATURES_SCOREBOARDS(x)           (((x) & GENMASK(15, 8)) >> 8)
> +#define CS_FEATURES_COMPUTE                  BIT(16)
> +#define CS_FEATURES_FRAGMENT                 BIT(17)
> +#define CS_FEATURES_TILER                    BIT(18)
> +     u32 features;
> +     u32 input_va;
> +     u32 output_va;
> +};
> +
> +struct panthor_fw_cs_input_iface {
> +#define CS_STATE_MASK                                GENMASK(2, 0)
> +#define CS_STATE_STOP                                0
> +#define CS_STATE_START                               1
> +#define CS_EXTRACT_EVENT                     BIT(4)
> +#define CS_IDLE_SYNC_WAIT                    BIT(8)
> +#define CS_IDLE_PROTM_PENDING                        BIT(9)
> +#define CS_IDLE_EMPTY                                BIT(10)
> +#define CS_IDLE_RESOURCE_REQ                 BIT(11)
> +#define CS_TILER_OOM                         BIT(26)
> +#define CS_PROTM_PENDING                     BIT(27)
> +#define CS_FATAL                             BIT(30)
> +#define CS_FAULT                             BIT(31)
> +#define CS_REQ_MASK                          (CS_STATE_MASK | \
> +                                              CS_EXTRACT_EVENT | \
> +                                              CS_IDLE_SYNC_WAIT | \
> +                                              CS_IDLE_PROTM_PENDING | \
> +                                              CS_IDLE_EMPTY | \
> +                                              CS_IDLE_RESOURCE_REQ)
> +#define CS_EVT_MASK                          (CS_TILER_OOM | \
> +                                              CS_PROTM_PENDING | \
> +                                              CS_FATAL | \
> +                                              CS_FAULT)
> +     u32 req;
> +
> +#define CS_CONFIG_PRIORITY(x)                        ((x) & GENMASK(3, 0))
> +#define CS_CONFIG_DOORBELL(x)                        (((x) << 8) & 
> GENMASK(15, 8))
> +     u32 config;
> +     u32 reserved1;
> +     u32 ack_irq_mask;
> +     u64 ringbuf_base;
> +     u32 ringbuf_size;
> +     u32 reserved2;
> +     u64 heap_start;
> +     u64 heap_end;
> +     u64 ringbuf_input;
> +     u64 ringbuf_output;
> +     u32 instr_config;
> +     u32 instrbuf_size;
> +     u64 instrbuf_base;
> +     u64 instrbuf_offset_ptr;
> +};
> +
> +struct panthor_fw_cs_output_iface {
> +     u32 ack;
> +     u32 reserved1[15];
> +     u64 status_cmd_ptr;
> +
> +#define CS_STATUS_WAIT_SB_MASK                       GENMASK(15, 0)
> +#define CS_STATUS_WAIT_SB_SRC_MASK           GENMASK(19, 16)
> +#define CS_STATUS_WAIT_SB_SRC_NONE           (0 << 16)
> +#define CS_STATUS_WAIT_SB_SRC_WAIT           (8 << 16)
> +#define CS_STATUS_WAIT_SYNC_COND_LE          (0 << 24)
> +#define CS_STATUS_WAIT_SYNC_COND_GT          (1 << 24)
> +#define CS_STATUS_WAIT_SYNC_COND_MASK                GENMASK(27, 24)
> +#define CS_STATUS_WAIT_PROGRESS                      BIT(28)
> +#define CS_STATUS_WAIT_PROTM                 BIT(29)
> +#define CS_STATUS_WAIT_SYNC_64B                      BIT(30)
> +#define CS_STATUS_WAIT_SYNC                  BIT(31)
> +     u32 status_wait;
> +     u32 status_req_resource;
> +     u64 status_wait_sync_ptr;
> +     u32 status_wait_sync_value;
> +     u32 status_scoreboards;
> +
> +#define CS_STATUS_BLOCKED_REASON_UNBLOCKED   0
> +#define CS_STATUS_BLOCKED_REASON_SB_WAIT     1
> +#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT       2
> +#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT   3
> +#define CS_STATUS_BLOCKED_REASON_DEFERRED    5
> +#define CS_STATUS_BLOCKED_REASON_RES         6
> +#define CS_STATUS_BLOCKED_REASON_FLUSH               7
> +#define CS_STATUS_BLOCKED_REASON_MASK                GENMASK(3, 0)
> +     u32 status_blocked_reason;
> +     u32 status_wait_sync_value_hi;
> +     u32 reserved2[6];
> +
> +#define CS_EXCEPTION_TYPE(x)                 ((x) & GENMASK(7, 0))
> +#define CS_EXCEPTION_DATA(x)                 (((x) >> 8) & GENMASK(23, 0))
> +     u32 fault;
> +     u32 fatal;
> +     u64 fault_info;
> +     u64 fatal_info;
> +     u32 reserved3[10];
> +     u32 heap_vt_start;
> +     u32 heap_vt_end;
> +     u32 reserved4;
> +     u32 heap_frag_end;
> +     u64 heap_address;
> +};
> +
> +struct panthor_fw_csg_control_iface {
> +     u32 features;
> +     u32 input_va;
> +     u32 output_va;
> +     u32 suspend_size;
> +     u32 protm_suspend_size;
> +     u32 stream_num;
> +     u32 stream_stride;
> +};
> +
> +struct panthor_fw_csg_input_iface {
> +#define CSG_STATE_MASK                               GENMASK(2, 0)
> +#define CSG_STATE_TERMINATE                  0
> +#define CSG_STATE_START                              1
> +#define CSG_STATE_SUSPEND                    2
> +#define CSG_STATE_RESUME                     3
> +#define CSG_ENDPOINT_CONFIG                  BIT(4)
> +#define CSG_STATUS_UPDATE                    BIT(5)
> +#define CSG_SYNC_UPDATE                              BIT(28)
> +#define CSG_IDLE                             BIT(29)
> +#define CSG_DOORBELL                         BIT(30)
> +#define CSG_PROGRESS_TIMER_EVENT             BIT(31)
> +#define CSG_REQ_MASK                         (CSG_STATE_MASK | \
> +                                              CSG_ENDPOINT_CONFIG | \
> +                                              CSG_STATUS_UPDATE)
> +#define CSG_EVT_MASK                         (CSG_SYNC_UPDATE | \
> +                                              CSG_IDLE | \
> +                                              CSG_PROGRESS_TIMER_EVENT)
> +     u32 req;
> +     u32 ack_irq_mask;
> +
> +     u32 doorbell_req;
> +     u32 cs_irq_ack;
> +     u32 reserved1[4];
> +     u64 allow_compute;
> +     u64 allow_fragment;
> +     u32 allow_other;
> +
> +#define CSG_EP_REQ_COMPUTE(x)                        ((x) & GENMASK(7, 0))
> +#define CSG_EP_REQ_FRAGMENT(x)                       (((x) << 8) & 
> GENMASK(15, 8))
> +#define CSG_EP_REQ_TILER(x)                  (((x) << 16) & GENMASK(19, 16))
> +#define CSG_EP_REQ_EXCL_COMPUTE                      BIT(20)
> +#define CSG_EP_REQ_EXCL_FRAGMENT             BIT(21)
> +#define CSG_EP_REQ_PRIORITY(x)                       (((x) << 28) & 
> GENMASK(31, 28))
> +#define CSG_EP_REQ_PRIORITY_MASK             GENMASK(31, 28)
> +     u32 endpoint_req;
> +     u32 reserved2[2];
> +     u64 suspend_buf;
> +     u64 protm_suspend_buf;
> +     u32 config;
> +     u32 iter_trace_config;
> +};
> +
> +struct panthor_fw_csg_output_iface {
> +     u32 ack;
> +     u32 reserved1;
> +     u32 doorbell_ack;
> +     u32 cs_irq_req;
> +     u32 status_endpoint_current;
> +     u32 status_endpoint_req;
> +
> +#define CSG_STATUS_STATE_IS_IDLE             BIT(0)
> +     u32 status_state;
> +     u32 resource_dep;
> +};
> +
> +struct panthor_fw_global_control_iface {
> +     u32 version;
> +     u32 features;
> +     u32 input_va;
> +     u32 output_va;
> +     u32 group_num;
> +     u32 group_stride;
> +     u32 perfcnt_size;
> +     u32 instr_features;
> +};
> +
> +struct panthor_fw_global_input_iface {
> +#define GLB_HALT                             BIT(0)
> +#define GLB_CFG_PROGRESS_TIMER                       BIT(1)
> +#define GLB_CFG_ALLOC_EN                     BIT(2)
> +#define GLB_CFG_POWEROFF_TIMER                       BIT(3)
> +#define GLB_PROTM_ENTER                              BIT(4)
> +#define GLB_PERFCNT_EN                               BIT(5)
> +#define GLB_PERFCNT_SAMPLER                  BIT(6)
> +#define GLB_COUNTER_EN                               BIT(7)
> +#define GLB_PING                             BIT(8)
> +#define GLB_FWCFG_UPDATE                     BIT(9)
> +#define GLB_IDLE_EN                          BIT(10)
> +#define GLB_SLEEP                            BIT(12)
> +#define GLB_INACTIVE_COMPUTE                 BIT(20)
> +#define GLB_INACTIVE_FRAGMENT                        BIT(21)
> +#define GLB_INACTIVE_TILER                   BIT(22)
> +#define GLB_PROTM_EXIT                               BIT(23)
> +#define GLB_PERFCNT_THRESHOLD                        BIT(24)
> +#define GLB_PERFCNT_OVERFLOW                 BIT(25)
> +#define GLB_IDLE                             BIT(26)
> +#define GLB_DBG_CSF                          BIT(30)
> +#define GLB_DBG_HOST                         BIT(31)
> +#define GLB_REQ_MASK                         GENMASK(10, 0)
> +#define GLB_EVT_MASK                         GENMASK(26, 20)
> +     u32 req;
> +     u32 ack_irq_mask;
> +     u32 doorbell_req;
> +     u32 reserved1;
> +     u32 progress_timer;
> +
> +#define GLB_TIMER_VAL(x)                     ((x) & GENMASK(30, 0))
> +#define GLB_TIMER_SOURCE_GPU_COUNTER         BIT(31)
> +     u32 poweroff_timer;
> +     u64 core_en_mask;
> +     u32 reserved2;
> +     u32 perfcnt_as;
> +     u64 perfcnt_base;
> +     u32 perfcnt_extract;
> +     u32 reserved3[3];
> +     u32 perfcnt_config;
> +     u32 perfcnt_csg_select;
> +     u32 perfcnt_fw_enable;
> +     u32 perfcnt_csg_enable;
> +     u32 perfcnt_csf_enable;
> +     u32 perfcnt_shader_enable;
> +     u32 perfcnt_tiler_enable;
> +     u32 perfcnt_mmu_l2_enable;
> +     u32 reserved4[8];
> +     u32 idle_timer;
> +};
> +
> +enum panthor_fw_halt_status {
> +     PANTHOR_FW_HALT_OK = 0,
> +     PANTHOR_FW_HALT_ON_PANIC = 0x4e,
> +     PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f,
> +};
> +
> +struct panthor_fw_global_output_iface {
> +     u32 ack;
> +     u32 reserved1;
> +     u32 doorbell_ack;
> +     u32 reserved2;
> +     u32 halt_status;
> +     u32 perfcnt_status;
> +     u32 perfcnt_insert;
> +};
> +
> +/**
> + * struct panthor_fw_cs_iface - Firmware command stream slot interface
> + */
> +struct panthor_fw_cs_iface {
> +     /**
> +      * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req
> +      * field.
> +      *
> +      * Needed so we can update the req field concurrently from the interrupt
> +      * handler and the scheduler logic.
> +      *
> +      * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
> +      * interface sections are mapped uncached/write-combined right now, and
> +      * using cmpxchg() on such mappings leads to SError faults. Revisit when
> +      * we have 'SHARED' GPU mappings hooked up.
> +      */
> +     spinlock_t lock;
> +
> +     /**
> +      * @control: Command stream slot control interface.
> +      *
> +      * Used to expose command stream slot properties.
> +      *
> +      * This interface is read-only.
> +      */
> +     struct panthor_fw_cs_control_iface *control;
> +
> +     /**
> +      * @input: Command stream slot input interface.
> +      *
> +      * Used for host updates/events.
> +      */
> +     struct panthor_fw_cs_input_iface *input;
> +
> +     /**
> +      * @output: Command stream slot output interface.
> +      *
> +      * Used for FW updates/events.
> +      *
> +      * This interface is read-only.
> +      */
> +     const struct panthor_fw_cs_output_iface *output;
> +};
> +
> +/**
> + * struct panthor_fw_csg_iface - Firmware command stream group slot interface
> + */
> +struct panthor_fw_csg_iface {
> +     /**
> +      * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req
> +      * field.
> +      *
> +      * Needed so we can update the req field concurrently from the interrupt
> +      * handler and the scheduler logic.
> +      *
> +      * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
> +      * interface sections are mapped uncached/write-combined right now, and
> +      * using cmpxchg() on such mappings leads to SError faults. Revisit when
> +      * we have 'SHARED' GPU mappings hooked up.
> +      */
> +     spinlock_t lock;
> +
> +     /**
> +      * @control: Command stream group slot control interface.
> +      *
> +      * Used to expose command stream group slot properties.
> +      *
> +      * This interface is read-only.
> +      */
> +     const struct panthor_fw_csg_control_iface *control;
> +
> +     /**
> +      * @input: Command stream slot input interface.
> +      *
> +      * Used for host updates/events.
> +      */
> +     struct panthor_fw_csg_input_iface *input;
> +
> +     /**
> +      * @output: Command stream group slot output interface.
> +      *
> +      * Used for FW updates/events.
> +      *
> +      * This interface is read-only.
> +      */
> +     const struct panthor_fw_csg_output_iface *output;
> +};
> +
> +/**
> + * struct panthor_fw_global_iface - Firmware global interface
> + */
> +struct panthor_fw_global_iface {
> +     /**
> +      * @lock: Lock protecting access to the 
> panthor_fw_global_input_iface::req
> +      * field.
> +      *
> +      * Needed so we can update the req field concurrently from the interrupt
> +      * handler and the scheduler/FW management logic.
> +      *
> +      * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
> +      * interface sections are mapped uncached/write-combined right now, and
> +      * using cmpxchg() on such mappings leads to SError faults. Revisit when
> +      * we have 'SHARED' GPU mappings hooked up.
> +      */
> +     spinlock_t lock;
> +
> +     /**
> +      * @control: Command stream group slot control interface.
> +      *
> +      * Used to expose global FW properties.
> +      *
> +      * This interface is read-only.
> +      */
> +     const struct panthor_fw_global_control_iface *control;
> +
> +     /**
> +      * @input: Global input interface.
> +      *
> +      * Used for host updates/events.
> +      */
> +     struct panthor_fw_global_input_iface *input;
> +
> +     /**
> +      * @output: Global output interface.
> +      *
> +      * Used for FW updates/events.
> +      *
> +      * This interface is read-only.
> +      */
> +     const struct panthor_fw_global_output_iface *output;
> +};
> +
> +/**
> + * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to 
> the FW
> + * @__iface: The interface to operate on.
> + * @__in_reg: Name of the register to update in the input section of the 
> interface.
> + * @__out_reg: Name of the register to take as a reference in the output 
> section of the
> + * interface.
> + * @__mask: Mask to apply to the update.
> + *
> + * The Host -> FW event/message passing was designed to be lockless, with 
> each side of
> + * the channel having its writeable section. Events are signaled as a 
> difference between
> + * the host and FW side in the req/ack registers (when a bit differs, 
> there's an event
> + * pending, when they are the same, nothing needs attention).
> + *
> + * This helper allows one to update the req register based on the current 
> value of the
> + * ack register managed by the FW. Toggling a specific bit will flag an 
> event. In order
> + * for events to be re-evaluated, the interface doorbell needs to be rung.
> + *
> + * Concurrent accesses to the same req register is covered.
> + *
> + * Anything requiring atomic updates to multiple registers requires a 
> dedicated lock.
> + */
> +#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \
> +     do { \
> +             u32 __cur_val, __new_val, __out_val; \
> +             spin_lock(&(__iface)->lock); \
> +             __cur_val = READ_ONCE((__iface)->input->__in_reg); \
> +             __out_val = READ_ONCE((__iface)->output->__out_reg); \
> +             __new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & 
> ~(__mask)); \
> +             WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
> +             spin_unlock(&(__iface)->lock); \
> +     } while (0)
> +
> +/**
> + * panthor_fw_update_reqs() - Update bits to reflect a configuration change
> + * @__iface: The interface to operate on.
> + * @__in_reg: Name of the register to update in the input section of the 
> interface.
> + * @__val: Value to set.
> + * @__mask: Mask to apply to the update.
> + *
> + * Some configuration get passed through req registers that are also used to
> + * send events to the FW. Those req registers being updated from the 
> interrupt
> + * handler, they require special helpers to update the configuration part as 
> well.
> + *
> + * Concurrent accesses to the same req register is covered.
> + *
> + * Anything requiring atomic updates to multiple registers requires a 
> dedicated lock.
> + */
> +#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \
> +     do { \
> +             u32 __cur_val, __new_val; \
> +             spin_lock(&(__iface)->lock); \
> +             __cur_val = READ_ONCE((__iface)->input->__in_reg); \
> +             __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
> +             WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
> +             spin_unlock(&(__iface)->lock); \
> +     } while (0)
> +
> +struct panthor_fw_global_iface *
> +panthor_fw_get_glb_iface(struct panthor_device *ptdev);
> +
> +struct panthor_fw_csg_iface *
> +panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot);
> +
> +struct panthor_fw_cs_iface *
> +panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 
> cs_slot);
> +
> +int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 
> req_mask,
> +                          u32 *acked, u32 timeout_ms);
> +
> +int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 
> *acked,
> +                          u32 timeout_ms);
> +
> +void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 
> csg_slot);
> +
> +struct panthor_kernel_bo *
> +panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
> +                              struct panthor_fw_ringbuf_input_iface **input,
> +                              const struct panthor_fw_ringbuf_output_iface 
> **output,
> +                              u32 *input_fw_va, u32 *output_fw_va);
> +struct panthor_kernel_bo *
> +panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size);
> +
> +struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev);
> +
> +void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang);
> +int panthor_fw_post_reset(struct panthor_device *ptdev);
> +
> +static inline void panthor_fw_suspend(struct panthor_device *ptdev)
> +{
> +     panthor_fw_pre_reset(ptdev, false);
> +}
> +
> +static inline int panthor_fw_resume(struct panthor_device *ptdev)
> +{
> +     return panthor_fw_post_reset(ptdev);
> +}
> +
> +int panthor_fw_init(struct panthor_device *ptdev);
> +void panthor_fw_unplug(struct panthor_device *ptdev);
> +
> +#endif
> -- 
> 2.43.0
> 

-- 
====================
| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---------------
    ¯\_(ツ)_/¯

Reply via email to