Am 2021-05-05 um 9:51 a.m. schrieb Eric Huang:
> In NPS4 BIOS we need to find the closest numa node when creating
> topology io link between cpu and gpu, if PCI driver doesn't set
> it.
>
> Signed-off-by: Eric Huang <jinhuieric.hu...@amd.com>

Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 91 +++++++++++++++++++++++++++
>  1 file changed, 91 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..0972b1014d6f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,92 @@ static int kfd_fill_gpu_memory_affinity(int 
> *avail_size,
>       return 0;
>  }
>  
> +#ifdef CONFIG_ACPI_NUMA
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
> +{
> +     struct acpi_table_header *table_header = NULL;
> +     struct acpi_subtable_header *sub_header = NULL;
> +     unsigned long table_end, subtable_len;
> +     u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> +                     pci_dev_id(kdev->pdev);
> +     u32 bdf;
> +     acpi_status status;
> +     struct acpi_srat_cpu_affinity *cpu;
> +     struct acpi_srat_generic_affinity *gpu;
> +     int pxm = 0, max_pxm = 0;
> +     int numa_node = NUMA_NO_NODE;
> +     bool found = false;
> +
> +     /* Fetch the SRAT table from ACPI */
> +     status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
> +     if (status == AE_NOT_FOUND) {
> +             pr_warn("SRAT table not found\n");
> +             return;
> +     } else if (ACPI_FAILURE(status)) {
> +             const char *err = acpi_format_exception(status);
> +             pr_err("SRAT table error: %s\n", err);
> +             return;
> +     }
> +
> +     table_end = (unsigned long)table_header + table_header->length;
> +
> +     /* Parse all entries looking for a match. */
> +     sub_header = (struct acpi_subtable_header *)
> +                     ((unsigned long)table_header +
> +                     sizeof(struct acpi_table_srat));
> +     subtable_len = sub_header->length;
> +
> +     while (((unsigned long)sub_header) + subtable_len  < table_end) {
> +             /*
> +              * If length is 0, break from this loop to avoid
> +              * infinite loop.
> +              */
> +             if (subtable_len == 0) {
> +                     pr_err("SRAT invalid zero length\n");
> +                     break;
> +             }
> +
> +             switch (sub_header->type) {
> +             case ACPI_SRAT_TYPE_CPU_AFFINITY:
> +                     cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> +                     pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> +                                     cpu->proximity_domain_lo;
> +                     if (pxm > max_pxm)
> +                             max_pxm = pxm;
> +                     break;
> +             case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> +                     gpu = (struct acpi_srat_generic_affinity *)sub_header;
> +                     bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
> +                                     *((u16 *)(&gpu->device_handle[2]));
> +                     if (bdf == pci_id) {
> +                             found = true;
> +                             numa_node = pxm_to_node(gpu->proximity_domain);
> +                     }
> +                     break;
> +             default:
> +                     break;
> +             }
> +
> +             if (found)
> +                     break;
> +
> +             sub_header = (struct acpi_subtable_header *)
> +                             ((unsigned long)sub_header + subtable_len);
> +             subtable_len = sub_header->length;
> +     }
> +
> +     acpi_put_table(table_header);
> +
> +     /* Workaround bad cpu-gpu binding case */
> +     if (found && (numa_node < 0 ||
> +                     numa_node > pxm_to_node(max_pxm)))
> +             numa_node = 0;
> +
> +     if (numa_node != NUMA_NO_NODE)
> +             set_dev_node(&kdev->pdev->dev, numa_node);
> +}
> +#endif
> +
>  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>   * to its NUMA node
>   *   @avail_size: Available size in the memory
> @@ -1804,6 +1890,11 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
> *avail_size,
>       }
>  
>       sub_type_hdr->proximity_domain_from = proximity_domain;
> +
> +#ifdef CONFIG_ACPI_NUMA
> +     if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> +             kfd_find_numa_node_in_srat(kdev);
> +#endif
>  #ifdef CONFIG_NUMA
>       if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>               sub_type_hdr->proximity_domain_to = 0;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to