Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-05 Thread Felix Kuehling
Am 2021-05-05 um 9:51 a.m. schrieb Eric Huang:
> In NPS4 BIOS we need to find the closest numa node when creating
> topology io link between cpu and gpu, if PCI driver doesn't set
> it.
>
> Signed-off-by: Eric Huang 

Reviewed-by: Felix Kuehling 


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 91 +++
>  1 file changed, 91 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..0972b1014d6f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,92 @@ static int kfd_fill_gpu_memory_affinity(int 
> *avail_size,
>   return 0;
>  }
>  
> +#ifdef CONFIG_ACPI_NUMA
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
> +{
> + struct acpi_table_header *table_header = NULL;
> + struct acpi_subtable_header *sub_header = NULL;
> + unsigned long table_end, subtable_len;
> + u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> + pci_dev_id(kdev->pdev);
> + u32 bdf;
> + acpi_status status;
> + struct acpi_srat_cpu_affinity *cpu;
> + struct acpi_srat_generic_affinity *gpu;
> + int pxm = 0, max_pxm = 0;
> + int numa_node = NUMA_NO_NODE;
> + bool found = false;
> +
> + /* Fetch the SRAT table from ACPI */
> + status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
> + if (status == AE_NOT_FOUND) {
> + pr_warn("SRAT table not found\n");
> + return;
> + } else if (ACPI_FAILURE(status)) {
> + const char *err = acpi_format_exception(status);
> + pr_err("SRAT table error: %s\n", err);
> + return;
> + }
> +
> + table_end = (unsigned long)table_header + table_header->length;
> +
> + /* Parse all entries looking for a match. */
> + sub_header = (struct acpi_subtable_header *)
> + ((unsigned long)table_header +
> + sizeof(struct acpi_table_srat));
> + subtable_len = sub_header->length;
> +
> + while (((unsigned long)sub_header) + subtable_len  < table_end) {
> + /*
> +  * If length is 0, break from this loop to avoid
> +  * infinite loop.
> +  */
> + if (subtable_len == 0) {
> + pr_err("SRAT invalid zero length\n");
> + break;
> + }
> +
> + switch (sub_header->type) {
> + case ACPI_SRAT_TYPE_CPU_AFFINITY:
> + cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> + pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> + cpu->proximity_domain_lo;
> + if (pxm > max_pxm)
> + max_pxm = pxm;
> + break;
> + case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> + gpu = (struct acpi_srat_generic_affinity *)sub_header;
> + bdf = *((u16 *)(>device_handle[0])) << 16 |
> + *((u16 *)(>device_handle[2]));
> + if (bdf == pci_id) {
> + found = true;
> + numa_node = pxm_to_node(gpu->proximity_domain);
> + }
> + break;
> + default:
> + break;
> + }
> +
> + if (found)
> + break;
> +
> + sub_header = (struct acpi_subtable_header *)
> + ((unsigned long)sub_header + subtable_len);
> + subtable_len = sub_header->length;
> + }
> +
> + acpi_put_table(table_header);
> +
> + /* Workaround bad cpu-gpu binding case */
> + if (found && (numa_node < 0 ||
> + numa_node > pxm_to_node(max_pxm)))
> + numa_node = 0;
> +
> + if (numa_node != NUMA_NO_NODE)
> + set_dev_node(>pdev->dev, numa_node);
> +}
> +#endif
> +
>  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>   * to its NUMA node
>   *   @avail_size: Available size in the memory
> @@ -1804,6 +1890,11 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
> *avail_size,
>   }
>  
>   sub_type_hdr->proximity_domain_from = proximity_domain;
> +
> +#ifdef CONFIG_ACPI_NUMA
> + if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> + kfd_find_numa_node_in_srat(kdev);
> +#endif
>  #ifdef CONFIG_NUMA
>   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>   sub_type_hdr->proximity_domain_to = 0;
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-05 Thread Eric Huang



On 2021-05-03 3:34 p.m., Felix Kuehling wrote:

Am 2021-05-03 um 3:27 p.m. schrieb Eric Huang:


On 2021-05-03 3:13 p.m., Felix Kuehling wrote:

Am 2021-05-03 um 10:47 a.m. schrieb Eric Huang:

In NPS4 BIOS we need to find the closest numa node when creating
topology io link between cpu and gpu, if PCI driver doesn't set
it.

Signed-off-by: Eric Huang 
---
   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 95
++-
   1 file changed, 93 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..58c6738de774 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,91 @@ static int kfd_fill_gpu_memory_affinity(int
*avail_size,
   return 0;
   }
   +#ifdef CONFIG_ACPI_NUMA
+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
+{
+    struct acpi_table_header *table_header = NULL;
+    struct acpi_subtable_header *sub_header = NULL;
+    unsigned long table_end, subtable_len;
+    u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+    pci_dev_id(kdev->pdev);
+    u32 bdf;
+    acpi_status status;
+    struct acpi_srat_cpu_affinity *cpu;
+    struct acpi_srat_generic_affinity *gpu;
+    int pxm = 0, max_pxm = 0;
+    int numa_node = NUMA_NO_NODE;
+    bool found = false;
+
+    /* Fetch the SRAT table from ACPI */
+    status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
+    if (status == AE_NOT_FOUND) {
+    pr_warn("SRAT table not found\n");
+    return;
+    } else if (ACPI_FAILURE(status)) {
+    const char *err = acpi_format_exception(status);
+    pr_err("SRAT table error: %s\n", err);
+    return;
+    }
+
+    table_end = (unsigned long)table_header + table_header->length;
+
+    /* Parse all entries looking for a match. */
+    sub_header = (struct acpi_subtable_header *)
+    ((unsigned long)table_header +
+    sizeof(struct acpi_table_srat));
+    subtable_len = sub_header->length;
+
+    while (((unsigned long)sub_header) + subtable_len  < table_end) {
+    /*
+ * If length is 0, break from this loop to avoid
+ * infinite loop.
+ */
+    if (subtable_len == 0) {
+    pr_err("SRAT invalid zero length\n");
+    break;
+    }
+
+    switch (sub_header->type) {
+    case ACPI_SRAT_TYPE_CPU_AFFINITY:
+    cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+    pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+    cpu->proximity_domain_lo;
+    if (pxm > max_pxm)
+    max_pxm = pxm;
+    break;
+    case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+    gpu = (struct acpi_srat_generic_affinity *)sub_header;
+    bdf = *((u16 *)(>device_handle[0])) << 16 |
+    *((u16 *)(>device_handle[2]));
+    if (bdf == pci_id) {
+    found = true;
+    numa_node = pxm_to_node(gpu->proximity_domain);
+    }
+    break;
+    default:
+    break;
+    }
+
+    if (found)
+    break;
+
+    sub_header = (struct acpi_subtable_header *)
+    ((unsigned long)sub_header + subtable_len);
+    subtable_len = sub_header->length;
+    }
+
+    acpi_put_table(table_header);
+
+    /* Workaround bad cpu-gpu binding case */
+    if (found && (numa_node < 0 || numa_node > max_pxm))
+    numa_node = 0;
+
+    if (numa_node != NUMA_NO_NODE)
+    set_dev_node(>pdev->dev, numa_node);
+}
+#endif
+
   /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
    * to its NUMA node
    *    @avail_size: Available size in the memory
@@ -1804,10 +1889,16 @@ static int
kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
   }
     sub_type_hdr->proximity_domain_from = proximity_domain;
-#ifdef CONFIG_NUMA
+
+#ifdef CONFIG_ACPI_NUMA
   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+    kfd_find_numa_node_in_srat(kdev);
+#endif
+#ifdef CONFIG_NUMA
+    if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
   sub_type_hdr->proximity_domain_to = 0;
-    else
+    set_dev_node(>pdev->dev, 0);

This should not be here. If you really want to lie about the NUMA node
and pretend that it's 0 and not NO_NODE, then that should be done in
kfd_find_numa_node_in_srat. That should be the only function that
changes the dev->numa_node. Like Oak pointed out, eventually that should
maybe not even be part of the driver. But I'm OK with keeping it as a
fallback in the driver for the case that one a GPU doesn't have a NUMA
node assigned by the kernel.

But is setting the dev->numa_node to 0 really necessary? Does anything
else in KFD depend on the dev->numa_node being 0? This function is only
supposed to fill the proximity_domain in the CRAT table. Setting
dev->numa_node is a side effect. If we can't figure out the correct NUMA
node, we shouldn't just guess 0 in a way that potentially 

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-04 Thread Eric Huang

Good point. Thanks.

Eric

On 2021-05-04 10:30 a.m., Lazar, Lijo wrote:


[AMD Official Use Only - Internal Distribution Only]


Converting using pxm_to_node and then comparing against pxm value 
looks a bit odd. Shouldn't the comparsion be between equals - node to 
node or pxm to pxm?


Thanks,
Lijo

*From:* Huang, JinHuiEric 
*Sent:* Tuesday, May 4, 2021 7:30:44 PM
*To:* Lazar, Lijo ; amd-gfx@lists.freedesktop.org 


*Subject:* Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
Like I answer Oak's question,
"For GCD parsing, the relation of GCD to CCD is defined by AMD, generic
parsing in srat.c is considering a GCD as a new numa node which is not
suitable for our need."

GCD's pxm domain will get a wrong numa node which may be bigger than CCD
domains, so I have to do a sanity check to correct it.

Regards,
Eric

On 2021-05-04 3:46 a.m., Lazar, Lijo wrote:
> [AMD Public Use]
>
>> *numa_node > max_pxm
> Why numa node number is compared to a proximity domain? Since you 
are already using pxm_to_node() API, assume that should take care.

>
> That also will avoid parsing ACPI_SRAT_TYPE_CPU_AFFINITY structs.
>
> Thanks,
> Lijo
>
>
> -Original Message-
> From: amd-gfx  On Behalf Of 
Eric Huang

> Sent: Wednesday, April 28, 2021 8:42 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Huang, JinHuiEric 
> Subject: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
>
> In NPS4 BIOS we need to find the closest numa node when creating 
topology io link between cpu and gpu, if PCI driver doesn't set it.

>
> Signed-off-by: Eric Huang 
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++-
>   1 file changed, 91 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c

> index 38d45711675f..57518136c7d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int 
*avail_size,

>    return 0;
>   }
>
> +#ifdef CONFIG_ACPI
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
> + int *numa_node)
> +{
> + struct acpi_table_header *table_header = NULL;
> + struct acpi_subtable_header *sub_header = NULL;
> + unsigned long table_end, subtable_len;
> + u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> + pci_dev_id(kdev->pdev);
> + u32 bdf;
> + acpi_status status;
> + struct acpi_srat_cpu_affinity *cpu;
> + struct acpi_srat_generic_affinity *gpu;
> + int pxm = 0, max_pxm = 0;
> + bool found = false;
> +
> + /* Fetch the SRAT table from ACPI */
> + status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
> + if (status == AE_NOT_FOUND) {
> + pr_warn("SRAT table not found\n");
> + return;
> + } else if (ACPI_FAILURE(status)) {
> + const char *err = acpi_format_exception(status);
> + pr_err("SRAT table error: %s\n", err);
> + return;
> + }
> +
> + table_end = (unsigned long)table_header + table_header->length;
> +
> + /* Parse all entries looking for a match. */
> +
> + sub_header = (struct acpi_subtable_header *)
> + ((unsigned long)table_header +
> + sizeof(struct acpi_table_srat));
> + subtable_len = sub_header->length;
> +
> + while (((unsigned long)sub_header) + subtable_len  < table_end) {
> + /*
> +  * If length is 0, break from this loop to avoid
> +  * infinite loop.
> +  */
> + if (subtable_len == 0) {
> + pr_err("SRAT invalid zero length\n");
> + break;
> + }
> +
> + switch (sub_header->type) {
> + case ACPI_SRAT_TYPE_CPU_AFFINITY:
> + cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> + pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> + cpu->proximity_domain_lo;
> + if (pxm > max_pxm)
> + max_pxm = pxm;
> + break;
> + case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> + gpu = (struct acpi_srat_generic_affinity 
*)sub_header;

> + bdf = *((u16 *)(>device_handle[0])) << 16 |
> + *((u16 
*)(>device_handle[2]));

> + if (bdf == pci_id) {
> + found = true;
> +  

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-04 Thread Lazar, Lijo
[AMD Official Use Only - Internal Distribution Only]

Converting using pxm_to_node and then comparing against pxm value looks a bit 
odd. Shouldn't the comparsion be between equals - node to node or pxm to pxm?

Thanks,
Lijo

From: Huang, JinHuiEric 
Sent: Tuesday, May 4, 2021 7:30:44 PM
To: Lazar, Lijo ; amd-gfx@lists.freedesktop.org 

Subject: Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

Like I answer Oak's question,
"For GCD parsing, the relation of GCD to CCD is defined by AMD, generic
parsing in srat.c is considering a GCD as a new numa node which is not
suitable for our need."

GCD's pxm domain will get a wrong numa node which may be bigger than CCD
domains, so I have to do a sanity check to correct it.

Regards,
Eric

On 2021-05-04 3:46 a.m., Lazar, Lijo wrote:
> [AMD Public Use]
>
>> *numa_node > max_pxm
> Why numa node number is compared to a proximity domain? Since you are already 
> using pxm_to_node() API, assume that should take care.
>
> That also will avoid parsing ACPI_SRAT_TYPE_CPU_AFFINITY structs.
>
> Thanks,
> Lijo
>
>
> -Original Message-
> From: amd-gfx  On Behalf Of Eric Huang
> Sent: Wednesday, April 28, 2021 8:42 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Huang, JinHuiEric 
> Subject: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology
>
> In NPS4 BIOS we need to find the closest numa node when creating topology io 
> link between cpu and gpu, if PCI driver doesn't set it.
>
> Signed-off-by: Eric Huang 
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++-
>   1 file changed, 91 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..57518136c7d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int 
> *avail_size,
>return 0;
>   }
>
> +#ifdef CONFIG_ACPI
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
> + int *numa_node)
> +{
> + struct acpi_table_header *table_header = NULL;
> + struct acpi_subtable_header *sub_header = NULL;
> + unsigned long table_end, subtable_len;
> + u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> + pci_dev_id(kdev->pdev);
> + u32 bdf;
> + acpi_status status;
> + struct acpi_srat_cpu_affinity *cpu;
> + struct acpi_srat_generic_affinity *gpu;
> + int pxm = 0, max_pxm = 0;
> + bool found = false;
> +
> + /* Fetch the SRAT table from ACPI */
> + status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
> + if (status == AE_NOT_FOUND) {
> + pr_warn("SRAT table not found\n");
> + return;
> + } else if (ACPI_FAILURE(status)) {
> + const char *err = acpi_format_exception(status);
> + pr_err("SRAT table error: %s\n", err);
> + return;
> + }
> +
> + table_end = (unsigned long)table_header + table_header->length;
> +
> + /* Parse all entries looking for a match. */
> +
> + sub_header = (struct acpi_subtable_header *)
> + ((unsigned long)table_header +
> + sizeof(struct acpi_table_srat));
> + subtable_len = sub_header->length;
> +
> + while (((unsigned long)sub_header) + subtable_len  < table_end) {
> + /*
> +  * If length is 0, break from this loop to avoid
> +  * infinite loop.
> +  */
> + if (subtable_len == 0) {
> + pr_err("SRAT invalid zero length\n");
> + break;
> + }
> +
> + switch (sub_header->type) {
> + case ACPI_SRAT_TYPE_CPU_AFFINITY:
> + cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> + pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> + cpu->proximity_domain_lo;
> + if (pxm > max_pxm)
> + max_pxm = pxm;
> + break;
> + case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> + gpu = (struct acpi_srat_generic_affinity *)sub_header;
> + bdf = *((u16 *)(>device_handle[0])) << 16 |
> + *((u16 *)(>device_handle[2]));
> + if (bdf == pci_id) {
> + found = true;
> + *numa_node = pxm_to_node(gpu->proxi

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-04 Thread Eric Huang

Like I answer Oak's question,
"For GCD parsing, the relation of GCD to CCD is defined by AMD, generic 
parsing in srat.c is considering a GCD as a new numa node which is not 
suitable for our need."


GCD's pxm domain will get a wrong numa node which may be bigger than CCD 
domains, so I have to do a sanity check to correct it.


Regards,
Eric

On 2021-05-04 3:46 a.m., Lazar, Lijo wrote:

[AMD Public Use]


*numa_node > max_pxm

Why numa node number is compared to a proximity domain? Since you are already 
using pxm_to_node() API, assume that should take care.

That also will avoid parsing ACPI_SRAT_TYPE_CPU_AFFINITY structs.

Thanks,
Lijo


-Original Message-
From: amd-gfx  On Behalf Of Eric Huang
Sent: Wednesday, April 28, 2021 8:42 PM
To: amd-gfx@lists.freedesktop.org
Cc: Huang, JinHuiEric 
Subject: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

In NPS4 BIOS we need to find the closest numa node when creating topology io 
link between cpu and gpu, if PCI driver doesn't set it.

Signed-off-by: Eric Huang 
---
  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++-
  1 file changed, 91 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..57518136c7d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
return 0;
  }
  
+#ifdef CONFIG_ACPI

+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
+   int *numa_node)
+{
+   struct acpi_table_header *table_header = NULL;
+   struct acpi_subtable_header *sub_header = NULL;
+   unsigned long table_end, subtable_len;
+   u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+   pci_dev_id(kdev->pdev);
+   u32 bdf;
+   acpi_status status;
+   struct acpi_srat_cpu_affinity *cpu;
+   struct acpi_srat_generic_affinity *gpu;
+   int pxm = 0, max_pxm = 0;
+   bool found = false;
+
+   /* Fetch the SRAT table from ACPI */
+   status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
+   if (status == AE_NOT_FOUND) {
+   pr_warn("SRAT table not found\n");
+   return;
+   } else if (ACPI_FAILURE(status)) {
+   const char *err = acpi_format_exception(status);
+   pr_err("SRAT table error: %s\n", err);
+   return;
+   }
+
+   table_end = (unsigned long)table_header + table_header->length;
+
+   /* Parse all entries looking for a match. */
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)table_header +
+   sizeof(struct acpi_table_srat));
+   subtable_len = sub_header->length;
+
+   while (((unsigned long)sub_header) + subtable_len  < table_end) {
+   /*
+* If length is 0, break from this loop to avoid
+* infinite loop.
+*/
+   if (subtable_len == 0) {
+   pr_err("SRAT invalid zero length\n");
+   break;
+   }
+
+   switch (sub_header->type) {
+   case ACPI_SRAT_TYPE_CPU_AFFINITY:
+   cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+   pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+   cpu->proximity_domain_lo;
+   if (pxm > max_pxm)
+   max_pxm = pxm;
+   break;
+   case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+   gpu = (struct acpi_srat_generic_affinity *)sub_header;
+   bdf = *((u16 *)(>device_handle[0])) << 16 |
+   *((u16 *)(>device_handle[2]));
+   if (bdf == pci_id) {
+   found = true;
+   *numa_node = pxm_to_node(gpu->proximity_domain);
+   }
+   break;
+   default:
+   break;
+   }
+
+   if (found)
+   break;
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)sub_header + subtable_len);
+   subtable_len = sub_header->length;
+   }
+
+   /* workaround bad cpu-gpu binding case */
+   if (found && (*numa_node < 0 || *numa_node > max_pxm))
+   *numa_node = 0;
+}
+#endif
+
  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
   * to its NUMA node
   *@avail_size: Available size in the memory
@@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
uint32_t proximity_domain)
  {
struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
+#ifdef CONFIG_NUMA
+ 

RE: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-04 Thread Lazar, Lijo
[AMD Public Use]

> *numa_node > max_pxm

Why numa node number is compared to a proximity domain? Since you are already 
using pxm_to_node() API, assume that should take care.

That also will avoid parsing ACPI_SRAT_TYPE_CPU_AFFINITY structs.

Thanks,
Lijo


-Original Message-
From: amd-gfx  On Behalf Of Eric Huang
Sent: Wednesday, April 28, 2021 8:42 PM
To: amd-gfx@lists.freedesktop.org
Cc: Huang, JinHuiEric 
Subject: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

In NPS4 BIOS we need to find the closest numa node when creating topology io 
link between cpu and gpu, if PCI driver doesn't set it.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++-
 1 file changed, 91 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..57518136c7d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
return 0;
 }
 
+#ifdef CONFIG_ACPI
+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
+   int *numa_node)
+{
+   struct acpi_table_header *table_header = NULL;
+   struct acpi_subtable_header *sub_header = NULL;
+   unsigned long table_end, subtable_len;
+   u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+   pci_dev_id(kdev->pdev);
+   u32 bdf;
+   acpi_status status;
+   struct acpi_srat_cpu_affinity *cpu;
+   struct acpi_srat_generic_affinity *gpu;
+   int pxm = 0, max_pxm = 0;
+   bool found = false;
+
+   /* Fetch the SRAT table from ACPI */
+   status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
+   if (status == AE_NOT_FOUND) {
+   pr_warn("SRAT table not found\n");
+   return;
+   } else if (ACPI_FAILURE(status)) {
+   const char *err = acpi_format_exception(status);
+   pr_err("SRAT table error: %s\n", err);
+   return;
+   }
+
+   table_end = (unsigned long)table_header + table_header->length;
+
+   /* Parse all entries looking for a match. */
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)table_header +
+   sizeof(struct acpi_table_srat));
+   subtable_len = sub_header->length;
+
+   while (((unsigned long)sub_header) + subtable_len  < table_end) {
+   /*
+* If length is 0, break from this loop to avoid
+* infinite loop.
+*/
+   if (subtable_len == 0) {
+   pr_err("SRAT invalid zero length\n");
+   break;
+   }
+
+   switch (sub_header->type) {
+   case ACPI_SRAT_TYPE_CPU_AFFINITY:
+   cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+   pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+   cpu->proximity_domain_lo;
+   if (pxm > max_pxm)
+   max_pxm = pxm;
+   break;
+   case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+   gpu = (struct acpi_srat_generic_affinity *)sub_header;
+   bdf = *((u16 *)(>device_handle[0])) << 16 |
+   *((u16 *)(>device_handle[2]));
+   if (bdf == pci_id) {
+   found = true;
+   *numa_node = pxm_to_node(gpu->proximity_domain);
+   }
+   break;
+   default:
+   break;
+   }
+
+   if (found)
+   break;
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)sub_header + subtable_len);
+   subtable_len = sub_header->length;
+   }
+
+   /* workaround bad cpu-gpu binding case */
+   if (found && (*numa_node < 0 || *numa_node > max_pxm))
+   *numa_node = 0;
+}
+#endif
+
 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
  * to its NUMA node
  * @avail_size: Available size in the memory
@@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
uint32_t proximity_domain)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
+#ifdef CONFIG_NUMA
+   int numa_node = 0;
+#endif
 
*avail_size -= sizeof(struct crat_subtype_iolink);
if (*avail_size < 0)
@@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
 
sub_type_hdr->proximity_domain_from = proximity_domain;  #ifdef 
CONFIG_NUMA
-   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
-   

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-03 Thread Zeng, Oak
Ok, that makes sense. Thanks for explaining.

Regards,
Oak 

 

On 2021-05-03, 3:13 PM, "Huang, JinHuiEric"  wrote:

In drivers/acpi/numa/srat.c, the generic CCD parsing is for the mapping 
of numa node and pxm domain that creates arrays of pxm_to_node_map and 
node_to_pxm_map. We are currently using API pxm_to_node() to get the 
corresponding information.

For GCD parsing, the relation of GCD to CCD is defined by AMD, generic 
parsing in srat.c is considering a GCD as a new numa node which is not 
suitable for our need.

Regards,
Eric

On 2021-05-03 2:43 p.m., Zeng, Oak wrote:
> I feel such parsing work should be part of the ACPI generic work so 
should be done in drivers/acpi/num/srat.c (see acpi_table_parse_srat) and the 
acpi subsystem should expose APIs for rest drivers to query such numa 
information.
>
> Regards,
> Oak
>
>   
>
> On 2021-04-28, 11:12 AM, "amd-gfx on behalf of Eric Huang" 
 
wrote:
>
>  In NPS4 BIOS we need to find the closest numa node when creating
>  topology io link between cpu and gpu, if PCI driver doesn't set
>  it.
>
>  Signed-off-by: Eric Huang 
>  ---
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 
++-
>   1 file changed, 91 insertions(+), 3 deletions(-)
>
>  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>  index 38d45711675f..57518136c7d7 100644
>  --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>  +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>  @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int 
*avail_size,
>   return 0;
>   }
>
>  +#ifdef CONFIG_ACPI
>  +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
>  +int *numa_node)
>  +{
>  +struct acpi_table_header *table_header = NULL;
>  +struct acpi_subtable_header *sub_header = NULL;
>  +unsigned long table_end, subtable_len;
>  +u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
>  +pci_dev_id(kdev->pdev);
>  +u32 bdf;
>  +acpi_status status;
>  +struct acpi_srat_cpu_affinity *cpu;
>  +struct acpi_srat_generic_affinity *gpu;
>  +int pxm = 0, max_pxm = 0;
>  +bool found = false;
>  +
>  +/* Fetch the SRAT table from ACPI */
>  +status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
>  +if (status == AE_NOT_FOUND) {
>  +pr_warn("SRAT table not found\n");
>  +return;
>  +} else if (ACPI_FAILURE(status)) {
>  +const char *err = acpi_format_exception(status);
>  +pr_err("SRAT table error: %s\n", err);
>  +return;
>  +}
>  +
>  +table_end = (unsigned long)table_header + table_header->length;
>  +
>  +/* Parse all entries looking for a match. */
>  +
>  +sub_header = (struct acpi_subtable_header *)
>  +((unsigned long)table_header +
>  +sizeof(struct acpi_table_srat));
>  +subtable_len = sub_header->length;
>  +
>  +while (((unsigned long)sub_header) + subtable_len  < table_end) 
{
>  +/*
>  + * If length is 0, break from this loop to avoid
>  + * infinite loop.
>  + */
>  +if (subtable_len == 0) {
>  +pr_err("SRAT invalid zero length\n");
>  +break;
>  +}
>  +
>  +switch (sub_header->type) {
>  +case ACPI_SRAT_TYPE_CPU_AFFINITY:
>  +cpu = (struct acpi_srat_cpu_affinity 
*)sub_header;
>  +pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
>  +cpu->proximity_domain_lo;
>  +if (pxm > max_pxm)
>  +max_pxm = pxm;
>  +break;
>  +case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
>  +gpu = (struct acpi_srat_generic_affinity 
*)sub_header;
>  +bdf = *((u16 *)(>device_handle[0])) << 16 |
>  +*((u16 
*)(>device_handle[2]));
>  +if (bdf == pci_id) {
>  +found = true;
>  +*numa_node = 
pxm_to_node(gpu->proximity_domain);
>  +}
>  +break;
>  +default:
>  +

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-03 Thread Felix Kuehling
Am 2021-05-03 um 3:27 p.m. schrieb Eric Huang:
>
>
> On 2021-05-03 3:13 p.m., Felix Kuehling wrote:
>> Am 2021-05-03 um 10:47 a.m. schrieb Eric Huang:
>>> In NPS4 BIOS we need to find the closest numa node when creating
>>> topology io link between cpu and gpu, if PCI driver doesn't set
>>> it.
>>>
>>> Signed-off-by: Eric Huang 
>>> ---
>>>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 95
>>> ++-
>>>   1 file changed, 93 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> index 38d45711675f..58c6738de774 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
>>> @@ -1759,6 +1759,91 @@ static int kfd_fill_gpu_memory_affinity(int
>>> *avail_size,
>>>   return 0;
>>>   }
>>>   +#ifdef CONFIG_ACPI_NUMA
>>> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
>>> +{
>>> +    struct acpi_table_header *table_header = NULL;
>>> +    struct acpi_subtable_header *sub_header = NULL;
>>> +    unsigned long table_end, subtable_len;
>>> +    u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
>>> +    pci_dev_id(kdev->pdev);
>>> +    u32 bdf;
>>> +    acpi_status status;
>>> +    struct acpi_srat_cpu_affinity *cpu;
>>> +    struct acpi_srat_generic_affinity *gpu;
>>> +    int pxm = 0, max_pxm = 0;
>>> +    int numa_node = NUMA_NO_NODE;
>>> +    bool found = false;
>>> +
>>> +    /* Fetch the SRAT table from ACPI */
>>> +    status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
>>> +    if (status == AE_NOT_FOUND) {
>>> +    pr_warn("SRAT table not found\n");
>>> +    return;
>>> +    } else if (ACPI_FAILURE(status)) {
>>> +    const char *err = acpi_format_exception(status);
>>> +    pr_err("SRAT table error: %s\n", err);
>>> +    return;
>>> +    }
>>> +
>>> +    table_end = (unsigned long)table_header + table_header->length;
>>> +
>>> +    /* Parse all entries looking for a match. */
>>> +    sub_header = (struct acpi_subtable_header *)
>>> +    ((unsigned long)table_header +
>>> +    sizeof(struct acpi_table_srat));
>>> +    subtable_len = sub_header->length;
>>> +
>>> +    while (((unsigned long)sub_header) + subtable_len  < table_end) {
>>> +    /*
>>> + * If length is 0, break from this loop to avoid
>>> + * infinite loop.
>>> + */
>>> +    if (subtable_len == 0) {
>>> +    pr_err("SRAT invalid zero length\n");
>>> +    break;
>>> +    }
>>> +
>>> +    switch (sub_header->type) {
>>> +    case ACPI_SRAT_TYPE_CPU_AFFINITY:
>>> +    cpu = (struct acpi_srat_cpu_affinity *)sub_header;
>>> +    pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
>>> +    cpu->proximity_domain_lo;
>>> +    if (pxm > max_pxm)
>>> +    max_pxm = pxm;
>>> +    break;
>>> +    case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
>>> +    gpu = (struct acpi_srat_generic_affinity *)sub_header;
>>> +    bdf = *((u16 *)(>device_handle[0])) << 16 |
>>> +    *((u16 *)(>device_handle[2]));
>>> +    if (bdf == pci_id) {
>>> +    found = true;
>>> +    numa_node = pxm_to_node(gpu->proximity_domain);
>>> +    }
>>> +    break;
>>> +    default:
>>> +    break;
>>> +    }
>>> +
>>> +    if (found)
>>> +    break;
>>> +
>>> +    sub_header = (struct acpi_subtable_header *)
>>> +    ((unsigned long)sub_header + subtable_len);
>>> +    subtable_len = sub_header->length;
>>> +    }
>>> +
>>> +    acpi_put_table(table_header);
>>> +
>>> +    /* Workaround bad cpu-gpu binding case */
>>> +    if (found && (numa_node < 0 || numa_node > max_pxm))
>>> +    numa_node = 0;
>>> +
>>> +    if (numa_node != NUMA_NO_NODE)
>>> +    set_dev_node(>pdev->dev, numa_node);
>>> +}
>>> +#endif
>>> +
>>>   /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>>>    * to its NUMA node
>>>    *    @avail_size: Available size in the memory
>>> @@ -1804,10 +1889,16 @@ static int
>>> kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
>>>   }
>>>     sub_type_hdr->proximity_domain_from = proximity_domain;
>>> -#ifdef CONFIG_NUMA
>>> +
>>> +#ifdef CONFIG_ACPI_NUMA
>>>   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
>>> +    kfd_find_numa_node_in_srat(kdev);
>>> +#endif
>>> +#ifdef CONFIG_NUMA
>>> +    if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
>>>   sub_type_hdr->proximity_domain_to = 0;
>>> -    else
>>> +    set_dev_node(>pdev->dev, 0);
>> This should not be here. If you really want to lie about the NUMA node
>> and pretend that it's 0 and not NO_NODE, then that should be done in
>> kfd_find_numa_node_in_srat. That should be the only function that
>> changes the dev->numa_node. Like Oak pointed out, eventually that should
>> maybe not even be part of the driver. But I'm OK with keeping it as 

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-03 Thread Eric Huang



On 2021-05-03 3:13 p.m., Felix Kuehling wrote:

Am 2021-05-03 um 10:47 a.m. schrieb Eric Huang:

In NPS4 BIOS we need to find the closest numa node when creating
topology io link between cpu and gpu, if PCI driver doesn't set
it.

Signed-off-by: Eric Huang 
---
  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 95 ++-
  1 file changed, 93 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..58c6738de774 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,91 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
return 0;
  }
  
+#ifdef CONFIG_ACPI_NUMA

+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
+{
+   struct acpi_table_header *table_header = NULL;
+   struct acpi_subtable_header *sub_header = NULL;
+   unsigned long table_end, subtable_len;
+   u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+   pci_dev_id(kdev->pdev);
+   u32 bdf;
+   acpi_status status;
+   struct acpi_srat_cpu_affinity *cpu;
+   struct acpi_srat_generic_affinity *gpu;
+   int pxm = 0, max_pxm = 0;
+   int numa_node = NUMA_NO_NODE;
+   bool found = false;
+
+   /* Fetch the SRAT table from ACPI */
+   status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
+   if (status == AE_NOT_FOUND) {
+   pr_warn("SRAT table not found\n");
+   return;
+   } else if (ACPI_FAILURE(status)) {
+   const char *err = acpi_format_exception(status);
+   pr_err("SRAT table error: %s\n", err);
+   return;
+   }
+
+   table_end = (unsigned long)table_header + table_header->length;
+
+   /* Parse all entries looking for a match. */
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)table_header +
+   sizeof(struct acpi_table_srat));
+   subtable_len = sub_header->length;
+
+   while (((unsigned long)sub_header) + subtable_len  < table_end) {
+   /*
+* If length is 0, break from this loop to avoid
+* infinite loop.
+*/
+   if (subtable_len == 0) {
+   pr_err("SRAT invalid zero length\n");
+   break;
+   }
+
+   switch (sub_header->type) {
+   case ACPI_SRAT_TYPE_CPU_AFFINITY:
+   cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+   pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+   cpu->proximity_domain_lo;
+   if (pxm > max_pxm)
+   max_pxm = pxm;
+   break;
+   case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+   gpu = (struct acpi_srat_generic_affinity *)sub_header;
+   bdf = *((u16 *)(>device_handle[0])) << 16 |
+   *((u16 *)(>device_handle[2]));
+   if (bdf == pci_id) {
+   found = true;
+   numa_node = pxm_to_node(gpu->proximity_domain);
+   }
+   break;
+   default:
+   break;
+   }
+
+   if (found)
+   break;
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)sub_header + subtable_len);
+   subtable_len = sub_header->length;
+   }
+
+   acpi_put_table(table_header);
+
+   /* Workaround bad cpu-gpu binding case */
+   if (found && (numa_node < 0 || numa_node > max_pxm))
+   numa_node = 0;
+
+   if (numa_node != NUMA_NO_NODE)
+   set_dev_node(>pdev->dev, numa_node);
+}
+#endif
+
  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
   * to its NUMA node
   *@avail_size: Available size in the memory
@@ -1804,10 +1889,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
}
  
  	sub_type_hdr->proximity_domain_from = proximity_domain;

-#ifdef CONFIG_NUMA
+
+#ifdef CONFIG_ACPI_NUMA
if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+   kfd_find_numa_node_in_srat(kdev);
+#endif
+#ifdef CONFIG_NUMA
+   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
sub_type_hdr->proximity_domain_to = 0;
-   else
+   set_dev_node(>pdev->dev, 0);

This should not be here. If you really want to lie about the NUMA node
and pretend that it's 0 and not NO_NODE, then that should be done in
kfd_find_numa_node_in_srat. That should be the only function that
changes the dev->numa_node. Like Oak pointed out, eventually that should
maybe not even be part of the driver. But I'm OK with keeping it as a
fallback in 

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-03 Thread Eric Huang
In drivers/acpi/numa/srat.c, the generic CCD parsing is for the mapping 
of numa node and pxm domain that creates arrays of pxm_to_node_map and 
node_to_pxm_map. We are currently using API pxm_to_node() to get the 
corresponding information.


For GCD parsing, the relation of GCD to CCD is defined by AMD, generic 
parsing in srat.c is considering a GCD as a new numa node which is not 
suitable for our need.


Regards,
Eric

On 2021-05-03 2:43 p.m., Zeng, Oak wrote:

I feel such parsing work should be part of the ACPI generic work so should be 
done in drivers/acpi/num/srat.c (see acpi_table_parse_srat) and the acpi 
subsystem should expose APIs for rest drivers to query such numa information.

Regards,
Oak

  


On 2021-04-28, 11:12 AM, "amd-gfx on behalf of Eric Huang" 
 wrote:

 In NPS4 BIOS we need to find the closest numa node when creating
 topology io link between cpu and gpu, if PCI driver doesn't set
 it.

 Signed-off-by: Eric Huang 
 ---
  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++-
  1 file changed, 91 insertions(+), 3 deletions(-)

 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
 index 38d45711675f..57518136c7d7 100644
 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
 +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
 @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int 
*avail_size,
return 0;
  }

 +#ifdef CONFIG_ACPI
 +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
 +  int *numa_node)
 +{
 +  struct acpi_table_header *table_header = NULL;
 +  struct acpi_subtable_header *sub_header = NULL;
 +  unsigned long table_end, subtable_len;
 +  u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
 +  pci_dev_id(kdev->pdev);
 +  u32 bdf;
 +  acpi_status status;
 +  struct acpi_srat_cpu_affinity *cpu;
 +  struct acpi_srat_generic_affinity *gpu;
 +  int pxm = 0, max_pxm = 0;
 +  bool found = false;
 +
 +  /* Fetch the SRAT table from ACPI */
 +  status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
 +  if (status == AE_NOT_FOUND) {
 +  pr_warn("SRAT table not found\n");
 +  return;
 +  } else if (ACPI_FAILURE(status)) {
 +  const char *err = acpi_format_exception(status);
 +  pr_err("SRAT table error: %s\n", err);
 +  return;
 +  }
 +
 +  table_end = (unsigned long)table_header + table_header->length;
 +
 +  /* Parse all entries looking for a match. */
 +
 +  sub_header = (struct acpi_subtable_header *)
 +  ((unsigned long)table_header +
 +  sizeof(struct acpi_table_srat));
 +  subtable_len = sub_header->length;
 +
 +  while (((unsigned long)sub_header) + subtable_len  < table_end) {
 +  /*
 +   * If length is 0, break from this loop to avoid
 +   * infinite loop.
 +   */
 +  if (subtable_len == 0) {
 +  pr_err("SRAT invalid zero length\n");
 +  break;
 +  }
 +
 +  switch (sub_header->type) {
 +  case ACPI_SRAT_TYPE_CPU_AFFINITY:
 +  cpu = (struct acpi_srat_cpu_affinity *)sub_header;
 +  pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
 +  cpu->proximity_domain_lo;
 +  if (pxm > max_pxm)
 +  max_pxm = pxm;
 +  break;
 +  case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
 +  gpu = (struct acpi_srat_generic_affinity *)sub_header;
 +  bdf = *((u16 *)(>device_handle[0])) << 16 |
 +  *((u16 *)(>device_handle[2]));
 +  if (bdf == pci_id) {
 +  found = true;
 +  *numa_node = pxm_to_node(gpu->proximity_domain);
 +  }
 +  break;
 +  default:
 +  break;
 +  }
 +
 +  if (found)
 +  break;
 +
 +  sub_header = (struct acpi_subtable_header *)
 +  ((unsigned long)sub_header + subtable_len);
 +  subtable_len = sub_header->length;
 +  }
 +
 +  /* workaround bad cpu-gpu binding case */
 +  if (found && (*numa_node < 0 || *numa_node > max_pxm))
 +  *numa_node = 0;
 +}
 +#endif
 +
  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
   * to its NUMA node
   *@avail_size: Available size in the memory
 @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
uint32_t proximity_domain)
  {
struct 

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-03 Thread Felix Kuehling
Am 2021-05-03 um 10:47 a.m. schrieb Eric Huang:
> In NPS4 BIOS we need to find the closest numa node when creating
> topology io link between cpu and gpu, if PCI driver doesn't set
> it.
>
> Signed-off-by: Eric Huang 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 95 ++-
>  1 file changed, 93 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..58c6738de774 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,91 @@ static int kfd_fill_gpu_memory_affinity(int 
> *avail_size,
>   return 0;
>  }
>  
> +#ifdef CONFIG_ACPI_NUMA
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
> +{
> + struct acpi_table_header *table_header = NULL;
> + struct acpi_subtable_header *sub_header = NULL;
> + unsigned long table_end, subtable_len;
> + u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> + pci_dev_id(kdev->pdev);
> + u32 bdf;
> + acpi_status status;
> + struct acpi_srat_cpu_affinity *cpu;
> + struct acpi_srat_generic_affinity *gpu;
> + int pxm = 0, max_pxm = 0;
> + int numa_node = NUMA_NO_NODE;
> + bool found = false;
> +
> + /* Fetch the SRAT table from ACPI */
> + status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
> + if (status == AE_NOT_FOUND) {
> + pr_warn("SRAT table not found\n");
> + return;
> + } else if (ACPI_FAILURE(status)) {
> + const char *err = acpi_format_exception(status);
> + pr_err("SRAT table error: %s\n", err);
> + return;
> + }
> +
> + table_end = (unsigned long)table_header + table_header->length;
> +
> + /* Parse all entries looking for a match. */
> + sub_header = (struct acpi_subtable_header *)
> + ((unsigned long)table_header +
> + sizeof(struct acpi_table_srat));
> + subtable_len = sub_header->length;
> +
> + while (((unsigned long)sub_header) + subtable_len  < table_end) {
> + /*
> +  * If length is 0, break from this loop to avoid
> +  * infinite loop.
> +  */
> + if (subtable_len == 0) {
> + pr_err("SRAT invalid zero length\n");
> + break;
> + }
> +
> + switch (sub_header->type) {
> + case ACPI_SRAT_TYPE_CPU_AFFINITY:
> + cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> + pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> + cpu->proximity_domain_lo;
> + if (pxm > max_pxm)
> + max_pxm = pxm;
> + break;
> + case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> + gpu = (struct acpi_srat_generic_affinity *)sub_header;
> + bdf = *((u16 *)(>device_handle[0])) << 16 |
> + *((u16 *)(>device_handle[2]));
> + if (bdf == pci_id) {
> + found = true;
> + numa_node = pxm_to_node(gpu->proximity_domain);
> + }
> + break;
> + default:
> + break;
> + }
> +
> + if (found)
> + break;
> +
> + sub_header = (struct acpi_subtable_header *)
> + ((unsigned long)sub_header + subtable_len);
> + subtable_len = sub_header->length;
> + }
> +
> + acpi_put_table(table_header);
> +
> + /* Workaround bad cpu-gpu binding case */
> + if (found && (numa_node < 0 || numa_node > max_pxm))
> + numa_node = 0;
> +
> + if (numa_node != NUMA_NO_NODE)
> + set_dev_node(>pdev->dev, numa_node);
> +}
> +#endif
> +
>  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>   * to its NUMA node
>   *   @avail_size: Available size in the memory
> @@ -1804,10 +1889,16 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
> *avail_size,
>   }
>  
>   sub_type_hdr->proximity_domain_from = proximity_domain;
> -#ifdef CONFIG_NUMA
> +
> +#ifdef CONFIG_ACPI_NUMA
>   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> + kfd_find_numa_node_in_srat(kdev);
> +#endif
> +#ifdef CONFIG_NUMA
> + if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
>   sub_type_hdr->proximity_domain_to = 0;
> - else
> + set_dev_node(>pdev->dev, 0);

This should not be here. If you really want to lie about the NUMA node
and pretend that it's 0 and not NO_NODE, then that should be done in
kfd_find_numa_node_in_srat. That should be the only function that
changes the dev->numa_node. Like Oak pointed out, eventually that should
maybe not even be part of the driver. But I'm OK with 

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-03 Thread Zeng, Oak
I feel such parsing work should be part of the ACPI generic work so should be 
done in drivers/acpi/num/srat.c (see acpi_table_parse_srat) and the acpi 
subsystem should expose APIs for rest drivers to query such numa information.

Regards,
Oak 

 

On 2021-04-28, 11:12 AM, "amd-gfx on behalf of Eric Huang" 
 
wrote:

In NPS4 BIOS we need to find the closest numa node when creating
topology io link between cpu and gpu, if PCI driver doesn't set
it.

Signed-off-by: Eric Huang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++-
 1 file changed, 91 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..57518136c7d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int 
*avail_size,
return 0;
 }

+#ifdef CONFIG_ACPI
+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
+   int *numa_node)
+{
+   struct acpi_table_header *table_header = NULL;
+   struct acpi_subtable_header *sub_header = NULL;
+   unsigned long table_end, subtable_len;
+   u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+   pci_dev_id(kdev->pdev);
+   u32 bdf;
+   acpi_status status;
+   struct acpi_srat_cpu_affinity *cpu;
+   struct acpi_srat_generic_affinity *gpu;
+   int pxm = 0, max_pxm = 0;
+   bool found = false;
+
+   /* Fetch the SRAT table from ACPI */
+   status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
+   if (status == AE_NOT_FOUND) {
+   pr_warn("SRAT table not found\n");
+   return;
+   } else if (ACPI_FAILURE(status)) {
+   const char *err = acpi_format_exception(status);
+   pr_err("SRAT table error: %s\n", err);
+   return;
+   }
+
+   table_end = (unsigned long)table_header + table_header->length;
+
+   /* Parse all entries looking for a match. */
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)table_header +
+   sizeof(struct acpi_table_srat));
+   subtable_len = sub_header->length;
+
+   while (((unsigned long)sub_header) + subtable_len  < table_end) {
+   /*
+* If length is 0, break from this loop to avoid
+* infinite loop.
+*/
+   if (subtable_len == 0) {
+   pr_err("SRAT invalid zero length\n");
+   break;
+   }
+
+   switch (sub_header->type) {
+   case ACPI_SRAT_TYPE_CPU_AFFINITY:
+   cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+   pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+   cpu->proximity_domain_lo;
+   if (pxm > max_pxm)
+   max_pxm = pxm;
+   break;
+   case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+   gpu = (struct acpi_srat_generic_affinity *)sub_header;
+   bdf = *((u16 *)(>device_handle[0])) << 16 |
+   *((u16 *)(>device_handle[2]));
+   if (bdf == pci_id) {
+   found = true;
+   *numa_node = pxm_to_node(gpu->proximity_domain);
+   }
+   break;
+   default:
+   break;
+   }
+
+   if (found)
+   break;
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)sub_header + subtable_len);
+   subtable_len = sub_header->length;
+   }
+
+   /* workaround bad cpu-gpu binding case */
+   if (found && (*numa_node < 0 || *numa_node > max_pxm))
+   *numa_node = 0;
+}
+#endif
+
 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
  * to its NUMA node
  * @avail_size: Available size in the memory
@@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
uint32_t proximity_domain)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
+#ifdef CONFIG_NUMA
+   int numa_node = 0;
+#endif

*avail_size -= sizeof(struct crat_subtype_iolink);
if (*avail_size < 0)
@@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,

sub_type_hdr->proximity_domain_from = proximity_domain;
 #ifdef CONFIG_NUMA
-   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
-   sub_type_hdr->proximity_domain_to = 0;
-   else
+ 

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-05-03 Thread Eric Huang

Thanks Felix for your review. I will send another patch.

Eric

On 2021-04-30 7:42 p.m., Felix Kuehling wrote:

Am 2021-04-28 um 11:11 a.m. schrieb Eric Huang:

In NPS4 BIOS we need to find the closest numa node when creating
topology io link between cpu and gpu, if PCI driver doesn't set
it.

Signed-off-by: Eric Huang 
---
  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++-
  1 file changed, 91 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..57518136c7d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
return 0;
  }
  
+#ifdef CONFIG_ACPI

+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
+   int *numa_node)
+{
+   struct acpi_table_header *table_header = NULL;
+   struct acpi_subtable_header *sub_header = NULL;
+   unsigned long table_end, subtable_len;
+   u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+   pci_dev_id(kdev->pdev);
+   u32 bdf;
+   acpi_status status;
+   struct acpi_srat_cpu_affinity *cpu;
+   struct acpi_srat_generic_affinity *gpu;
+   int pxm = 0, max_pxm = 0;
+   bool found = false;
+
+   /* Fetch the SRAT table from ACPI */
+   status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
+   if (status == AE_NOT_FOUND) {
+   pr_warn("SRAT table not found\n");
+   return;
+   } else if (ACPI_FAILURE(status)) {
+   const char *err = acpi_format_exception(status);
+   pr_err("SRAT table error: %s\n", err);
+   return;
+   }

After a successful call to acpi_get_table you need to call
acpi_put_table before this function returns to avoid leaking memory.



+
+   table_end = (unsigned long)table_header + table_header->length;
+
+   /* Parse all entries looking for a match. */
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)table_header +
+   sizeof(struct acpi_table_srat));
+   subtable_len = sub_header->length;
+
+   while (((unsigned long)sub_header) + subtable_len  < table_end) {
+   /*
+* If length is 0, break from this loop to avoid
+* infinite loop.
+*/
+   if (subtable_len == 0) {
+   pr_err("SRAT invalid zero length\n");
+   break;
+   }
+
+   switch (sub_header->type) {
+   case ACPI_SRAT_TYPE_CPU_AFFINITY:
+   cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+   pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+   cpu->proximity_domain_lo;
+   if (pxm > max_pxm)
+   max_pxm = pxm;
+   break;
+   case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+   gpu = (struct acpi_srat_generic_affinity *)sub_header;
+   bdf = *((u16 *)(>device_handle[0])) << 16 |
+   *((u16 *)(>device_handle[2]));
+   if (bdf == pci_id) {
+   found = true;
+   *numa_node = pxm_to_node(gpu->proximity_domain);
+   }
+   break;
+   default:
+   break;
+   }
+
+   if (found)
+   break;
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)sub_header + subtable_len);
+   subtable_len = sub_header->length;
+   }
+
+   /* workaround bad cpu-gpu binding case */
+   if (found && (*numa_node < 0 || *numa_node > max_pxm))
+   *numa_node = 0;

A suggestion: If you find a sensible NUMA node, call set_dev_node here.
That simplifies the caller. See below



+}
+#endif
+
  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
   * to its NUMA node
   *@avail_size: Available size in the memory
@@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
uint32_t proximity_domain)
  {
struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
+#ifdef CONFIG_NUMA
+   int numa_node = 0;

Should this be NUMA_NO_NODE?



+#endif
  
  	*avail_size -= sizeof(struct crat_subtype_iolink);

if (*avail_size < 0)
@@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
  
  	sub_type_hdr->proximity_domain_from = proximity_domain;

  #ifdef CONFIG_NUMA
-   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
-   sub_type_hdr->proximity_domain_to = 0;
-   else
+   if 

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-04-30 Thread Felix Kuehling
Am 2021-04-28 um 11:11 a.m. schrieb Eric Huang:
> In NPS4 BIOS we need to find the closest numa node when creating
> topology io link between cpu and gpu, if PCI driver doesn't set
> it.
>
> Signed-off-by: Eric Huang 
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++-
>  1 file changed, 91 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 38d45711675f..57518136c7d7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int 
> *avail_size,
>   return 0;
>  }
>  
> +#ifdef CONFIG_ACPI
> +static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
> + int *numa_node)
> +{
> + struct acpi_table_header *table_header = NULL;
> + struct acpi_subtable_header *sub_header = NULL;
> + unsigned long table_end, subtable_len;
> + u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
> + pci_dev_id(kdev->pdev);
> + u32 bdf;
> + acpi_status status;
> + struct acpi_srat_cpu_affinity *cpu;
> + struct acpi_srat_generic_affinity *gpu;
> + int pxm = 0, max_pxm = 0;
> + bool found = false;
> +
> + /* Fetch the SRAT table from ACPI */
> + status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
> + if (status == AE_NOT_FOUND) {
> + pr_warn("SRAT table not found\n");
> + return;
> + } else if (ACPI_FAILURE(status)) {
> + const char *err = acpi_format_exception(status);
> + pr_err("SRAT table error: %s\n", err);
> + return;
> + }
After a successful call to acpi_get_table you need to call
acpi_put_table before this function returns to avoid leaking memory.


> +
> + table_end = (unsigned long)table_header + table_header->length;
> +
> + /* Parse all entries looking for a match. */
> +
> + sub_header = (struct acpi_subtable_header *)
> + ((unsigned long)table_header +
> + sizeof(struct acpi_table_srat));
> + subtable_len = sub_header->length;
> +
> + while (((unsigned long)sub_header) + subtable_len  < table_end) {
> + /*
> +  * If length is 0, break from this loop to avoid
> +  * infinite loop.
> +  */
> + if (subtable_len == 0) {
> + pr_err("SRAT invalid zero length\n");
> + break;
> + }
> +
> + switch (sub_header->type) {
> + case ACPI_SRAT_TYPE_CPU_AFFINITY:
> + cpu = (struct acpi_srat_cpu_affinity *)sub_header;
> + pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
> + cpu->proximity_domain_lo;
> + if (pxm > max_pxm)
> + max_pxm = pxm;
> + break;
> + case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
> + gpu = (struct acpi_srat_generic_affinity *)sub_header;
> + bdf = *((u16 *)(>device_handle[0])) << 16 |
> + *((u16 *)(>device_handle[2]));
> + if (bdf == pci_id) {
> + found = true;
> + *numa_node = pxm_to_node(gpu->proximity_domain);
> + }
> + break;
> + default:
> + break;
> + }
> +
> + if (found)
> + break;
> +
> + sub_header = (struct acpi_subtable_header *)
> + ((unsigned long)sub_header + subtable_len);
> + subtable_len = sub_header->length;
> + }
> +
> + /* workaround bad cpu-gpu binding case */
> + if (found && (*numa_node < 0 || *numa_node > max_pxm))
> + *numa_node = 0;

A suggestion: If you find a sensible NUMA node, call set_dev_node here.
That simplifies the caller. See below


> +}
> +#endif
> +
>  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
>   * to its NUMA node
>   *   @avail_size: Available size in the memory
> @@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
> *avail_size,
>   uint32_t proximity_domain)
>  {
>   struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
> +#ifdef CONFIG_NUMA
> + int numa_node = 0;

Should this be NUMA_NO_NODE?


> +#endif
>  
>   *avail_size -= sizeof(struct crat_subtype_iolink);
>   if (*avail_size < 0)
> @@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
> *avail_size,
>  
>   sub_type_hdr->proximity_domain_from = proximity_domain;
>  #ifdef CONFIG_NUMA
> - if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
> - sub_type_hdr->proximity_domain_to = 0;
> - else
> + if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {

Re: [PATCH] drm/amdkfd: add ACPI SRAT parsing for topology

2021-04-30 Thread Eric Huang

ping...

On 2021-04-28 11:11 a.m., Eric Huang wrote:

In NPS4 BIOS we need to find the closest numa node when creating
topology io link between cpu and gpu, if PCI driver doesn't set
it.

Signed-off-by: Eric Huang 
---
  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 94 ++-
  1 file changed, 91 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 38d45711675f..57518136c7d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1759,6 +1759,87 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
return 0;
  }
  
+#ifdef CONFIG_ACPI

+static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev,
+   int *numa_node)
+{
+   struct acpi_table_header *table_header = NULL;
+   struct acpi_subtable_header *sub_header = NULL;
+   unsigned long table_end, subtable_len;
+   u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
+   pci_dev_id(kdev->pdev);
+   u32 bdf;
+   acpi_status status;
+   struct acpi_srat_cpu_affinity *cpu;
+   struct acpi_srat_generic_affinity *gpu;
+   int pxm = 0, max_pxm = 0;
+   bool found = false;
+
+   /* Fetch the SRAT table from ACPI */
+   status = acpi_get_table(ACPI_SIG_SRAT, 0, _header);
+   if (status == AE_NOT_FOUND) {
+   pr_warn("SRAT table not found\n");
+   return;
+   } else if (ACPI_FAILURE(status)) {
+   const char *err = acpi_format_exception(status);
+   pr_err("SRAT table error: %s\n", err);
+   return;
+   }
+
+   table_end = (unsigned long)table_header + table_header->length;
+
+   /* Parse all entries looking for a match. */
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)table_header +
+   sizeof(struct acpi_table_srat));
+   subtable_len = sub_header->length;
+
+   while (((unsigned long)sub_header) + subtable_len  < table_end) {
+   /*
+* If length is 0, break from this loop to avoid
+* infinite loop.
+*/
+   if (subtable_len == 0) {
+   pr_err("SRAT invalid zero length\n");
+   break;
+   }
+
+   switch (sub_header->type) {
+   case ACPI_SRAT_TYPE_CPU_AFFINITY:
+   cpu = (struct acpi_srat_cpu_affinity *)sub_header;
+   pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
+   cpu->proximity_domain_lo;
+   if (pxm > max_pxm)
+   max_pxm = pxm;
+   break;
+   case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
+   gpu = (struct acpi_srat_generic_affinity *)sub_header;
+   bdf = *((u16 *)(>device_handle[0])) << 16 |
+   *((u16 *)(>device_handle[2]));
+   if (bdf == pci_id) {
+   found = true;
+   *numa_node = pxm_to_node(gpu->proximity_domain);
+   }
+   break;
+   default:
+   break;
+   }
+
+   if (found)
+   break;
+
+   sub_header = (struct acpi_subtable_header *)
+   ((unsigned long)sub_header + subtable_len);
+   subtable_len = sub_header->length;
+   }
+
+   /* workaround bad cpu-gpu binding case */
+   if (found && (*numa_node < 0 || *numa_node > max_pxm))
+   *numa_node = 0;
+}
+#endif
+
  /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
   * to its NUMA node
   *@avail_size: Available size in the memory
@@ -1774,6 +1855,9 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
uint32_t proximity_domain)
  {
struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd;
+#ifdef CONFIG_NUMA
+   int numa_node = 0;
+#endif
  
  	*avail_size -= sizeof(struct crat_subtype_iolink);

if (*avail_size < 0)
@@ -1805,9 +1889,13 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int 
*avail_size,
  
  	sub_type_hdr->proximity_domain_from = proximity_domain;

  #ifdef CONFIG_NUMA
-   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
-   sub_type_hdr->proximity_domain_to = 0;
-   else
+   if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) {
+#ifdef CONFIG_ACPI
+   kfd_find_numa_node_in_srat(kdev, _node);
+#endif
+   sub_type_hdr->proximity_domain_to = numa_node;
+   set_dev_node(>pdev->dev, numa_node);
+   } else
sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
  #else
sub_type_hdr->proximity_domain_to = 0;