amdgpu: Add peer-to-peer support among PCIe connected AMD GPUs

Felix Kuehling Fri, 03 Jun 2022 15:26:52 -0700


On 2022-06-03 06:52, Ramesh Errabolu wrote:

Add support for peer-to-peer communication among AMD GPUs over PCIe
bus. Support REQUIRES enablement of config HSA_AMD_P2P.

Signed-off-by: Ramesh Errabolu <ramesh.errab...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   1 +
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 312 ++++++++++++++----
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  33 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |   8 +
  4 files changed, 294 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index f8b9f27adcf5..5c00ea1df21c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -48,6 +48,7 @@ enum kfd_mem_attachment_type {
        KFD_MEM_ATT_SHARED,     /* Share kgd_mem->bo or another attachment's */
        KFD_MEM_ATT_USERPTR,    /* SG bo to DMA map pages from a userptr bo */
        KFD_MEM_ATT_DMABUF,     /* DMAbuf to DMA map TTM BOs */
+       KFD_MEM_ATT_SG          /* Tag to DMA map SG BOs */
  };

struct kfd_mem_attachment {

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 054e4a76ae2e..8e37eae26e49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -241,6 +241,42 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
        kfree(bo->kfd_bo);
  }

+/**

+ * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information
+ * about USERPTR or DOOREBELL or MMIO BO.
+ * @adev: Device for which dmamap BO is being created
+ * @mem: BO of peer device that is being DMA mapped. Provides parameters
+ *      in building the dmamap BO
+ * @bo_out: Output parameter updated with handle of dmamap BO
+ */
+static int
+create_dmamap_sg_bo(struct amdgpu_device *adev,
+                struct kgd_mem *mem, struct amdgpu_bo **bo_out)
+{
+       struct drm_gem_object *gem_obj;
+       int ret, align;
+
+       ret = amdgpu_bo_reserve(mem->bo, false);
+       if (ret)
+               return ret;
+
+       align = 1;
+       ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align,
+                       AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE,
+                       ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);
+
+       amdgpu_bo_unreserve(mem->bo);
+
+       if (ret) {
+               pr_err("Error in creating DMA mappable SG BO on domain: %d\n", 
ret);
+               return -EINVAL;
+       }
+
+       *bo_out = gem_to_amdgpu_bo(gem_obj);
+       (*bo_out)->parent = amdgpu_bo_ref(mem->bo);
+       return ret;
+}
+
  /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
   *  reservation object.
   *
@@ -481,6 +517,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, 
struct kgd_mem *mem)
        return pte_flags;
  }

+/**

+ * create_sg_table() - Create an sg_table for a contiguous DMA addr range
+ * @addr: The starting address to point to
+ * @size: Size of memory area in bytes being pointed to
+ *
+ * Allocates an instance of sg_table and initializes it to point to memory
+ * area specified by input parameters. The address used to build is assumed
+ * to be DMA mapped, if needed.
+ *
+ * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table
+ * because they are physically contiguous.
+ *
+ * Return: Initialized instance of SG Table or NULL
+ */
+static struct sg_table *create_sg_table(uint64_t addr, uint32_t size)
+{
+       struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+
+       if (!sg)
+               return NULL;
+       if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
+               kfree(sg);
+               return NULL;
+       }
+       sg_dma_address(sg->sgl) = addr;
+       sg->sgl->length = size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+       sg->sgl->dma_length = size;
+#endif
+       return sg;
+}
+
  static int
  kfd_mem_dmamap_userptr(struct kgd_mem *mem,
                       struct kfd_mem_attachment *attachment)
@@ -545,6 +613,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment 
*attachment)
        return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
  }

+/**

+ * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or 
MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * An access request from the device that owns DOORBELL does not require DMA 
mapping.
+ * This is because the request doesn't go through PCIe root complex i.e. it 
instead
+ * loops back. The need to DMA map arises only when accessing peer device's 
DOORBELL
+ *
+ * In contrast, all access requests for MMIO need to be DMA mapped without 
regard to
+ * device ownership. This is because access requests for MMIO go through PCIe 
root
+ * complex.
+ *
+ * This is accomplished in two steps:
+ *   - Obtain DMA mapped address of DOORBELL or MMIO memory that could be used
+ *         in updating requesting device's page table
+ *   - Signal TTM to mark memory pointed to by requesting device's BO as GPU
+ *         accessible. This allows an update of requesting device's page table
+ *         with entries associated with DOOREBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ *   - Mapping of DOORBELL or MMIO BO of same or peer device
+ *   - Validating an evicted DOOREBELL or MMIO BO on device seeking access
+ *
+ * Return: ZERO if successful, NON-ZERO otherwise
+ */
+static int
+kfd_mem_dmamap_sg_bo(struct kgd_mem *mem,
+                    struct kfd_mem_attachment *attachment)
+{
+       struct ttm_operation_ctx ctx = {.interruptible = true};
+       struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+       struct amdgpu_device *adev = attachment->adev;
+       struct ttm_tt *ttm = bo->tbo.ttm;
+       enum dma_data_direction dir;
+       dma_addr_t dma_addr;
+       bool mmio;
+       int ret;
+
+       /* Expect SG Table of dmapmap BO to be NULL */
+       mmio = (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP);
+       if (unlikely(ttm->sg)) {
+               pr_err("SG Table of %d BO for peer device is UNEXPECTEDLY 
NON-NULL", mmio);
+               return -EINVAL;
+       }
+
+       dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+                       DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+       dma_addr = mem->bo->tbo.sg->sgl->dma_address;
+       pr_debug("%d BO size: %d\n", mmio, mem->bo->tbo.sg->sgl->length);
+       pr_debug("%d BO address before DMA mapping: %llx\n", mmio, dma_addr);
+       dma_addr = dma_map_resource(adev->dev, dma_addr,
+                       mem->bo->tbo.sg->sgl->length, dir, 
DMA_ATTR_SKIP_CPU_SYNC);
+       ret = dma_mapping_error(adev->dev, dma_addr);
+       if (unlikely(ret))
+               return ret;
+       pr_debug("%d BO address after DMA mapping: %llx\n", mmio, dma_addr);
+
+       ttm->sg = create_sg_table(dma_addr, mem->bo->tbo.sg->sgl->length);
+       if (unlikely(!ttm->sg)) {
+               ret = -ENOMEM;
+               goto unmap_sg;
+       }
+
+       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+       ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+       if (unlikely(ret))
+               goto free_sg;
+
+       return ret;
+
+free_sg:
+       sg_free_table(ttm->sg);
+       kfree(ttm->sg);
+       ttm->sg = NULL;
+unmap_sg:
+       dma_unmap_resource(adev->dev, dma_addr, mem->bo->tbo.sg->sgl->length,
+                          dir, DMA_ATTR_SKIP_CPU_SYNC);
+       return ret;
+}
+
  static int
  kfd_mem_dmamap_attachment(struct kgd_mem *mem,
                          struct kfd_mem_attachment *attachment)
@@ -556,6 +705,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem,
                return kfd_mem_dmamap_userptr(mem, attachment);
        case KFD_MEM_ATT_DMABUF:
                return kfd_mem_dmamap_dmabuf(attachment);
+       case KFD_MEM_ATT_SG:
+               return kfd_mem_dmamap_sg_bo(mem, attachment);
        default:
                WARN_ON_ONCE(1);
        }
@@ -596,6 +747,50 @@ kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment 
*attachment)
        ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
  }

+/**

+ * kfd_mem_dmaunmap_sg_bo() - Free DMA mapped sg_table of DOORBELL or MMIO BO
+ * @mem: SG BO of the DOORBELL or MMIO resource on the owning device
+ * @attachment: Virtual address attachment of the BO on accessing device
+ *
+ * The method performs following steps:
+ *   - Signal TTM to mark memory pointed to by BO as GPU inaccessible
+ *   - Free SG Table that is used to encapsulate DMA mapped memory of
+ *          peer device's DOORBELL or MMIO memory
+ *
+ * This method is invoked in the following contexts:
+ *     UNMapping of DOORBELL or MMIO BO on a device having access to its memory
+ *     Eviction of DOOREBELL or MMIO BO on device having access to its memory
+ *
+ * Return: void
+ */
+static void
+kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem,
+                      struct kfd_mem_attachment *attachment)
+{
+       struct ttm_operation_ctx ctx = {.interruptible = true};
+       struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+       struct amdgpu_device *adev = attachment->adev;
+       struct ttm_tt *ttm = bo->tbo.ttm;
+       enum dma_data_direction dir;
+
+       if (unlikely(!ttm->sg)) {
+               pr_err("SG Table of BO is UNEXPECTEDLY NULL");
+               return;
+       }
+
+       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+       ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+
+       dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+                               DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+       dma_unmap_resource(adev->dev, ttm->sg->sgl->dma_address,
+                       ttm->sg->sgl->length, dir, DMA_ATTR_SKIP_CPU_SYNC);
+       sg_free_table(ttm->sg);
+       kfree(ttm->sg);
+       ttm->sg = NULL;
+       bo->tbo.sg = NULL;
+}
+
  static void
  kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
                            struct kfd_mem_attachment *attachment)
@@ -609,38 +804,14 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
        case KFD_MEM_ATT_DMABUF:
                kfd_mem_dmaunmap_dmabuf(attachment);
                break;
+       case KFD_MEM_ATT_SG:
+               kfd_mem_dmaunmap_sg_bo(mem, attachment);
+               break;
        default:
                WARN_ON_ONCE(1);
        }
  }

-static int

-kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
-                      struct amdgpu_bo **bo)
-{
-       unsigned long bo_size = mem->bo->tbo.base.size;
-       struct drm_gem_object *gobj;
-       int ret;
-
-       ret = amdgpu_bo_reserve(mem->bo, false);
-       if (ret)
-               return ret;
-
-       ret = amdgpu_gem_object_create(adev, bo_size, 1,
-                                      AMDGPU_GEM_DOMAIN_CPU,
-                                      AMDGPU_GEM_CREATE_PREEMPTIBLE,
-                                      ttm_bo_type_sg, mem->bo->tbo.base.resv,
-                                      &gobj);
-       amdgpu_bo_unreserve(mem->bo);
-       if (ret)
-               return ret;
-
-       *bo = gem_to_amdgpu_bo(gobj);
-       (*bo)->parent = amdgpu_bo_ref(mem->bo);
-
-       return 0;
-}
-
  static int
  kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
                      struct amdgpu_bo **bo)
@@ -691,6 +862,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
        uint64_t va = mem->va;
        struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
        struct amdgpu_bo *bo[2] = {NULL, NULL};
+       bool same_hive = false;
+       bool accessible = false;
        int i, ret;

if (!va) {

@@ -698,6 +871,31 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
                return -EINVAL;
        }

+ /* Determine access to VRAM, MMIO and DOORBELL BOs of peer devices

+        *
+        * The access path of MMIO and DOORBELL BOs of is always over PCIe.
+        * In contrast the access path of VRAM BOs depens upon the type of
+        * link that connects the peer device. Access over PCIe is allowed
+        * if peer device has large BAR. In contrast, access over xGMI is
+        * allowed for both small and large BAR configurations of peer device
+        */
+
+       if ((adev != bo_adev) &&
+           ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||

The parentheses around == and != are unnecessary.

+            (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
+            (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
+               accessible = amdgpu_device_is_peer_accessible(bo_adev, adev);

This can still call amdgpu_device_is_peer_accessible unnecessarily onXGMI connected GPUs.


This whole block could be simplified like this

        if (adev != bo_adev &&
            (mem->domain == AMDGPU_GEM_DOMAIN_VRAM ||
             (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
             (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
                if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM)
                        same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
                if (!same_hive && !amdgpu_device_is_peer_accessible(bo_adev, 
adev))
                        return -EINVAL;
        }

The short-circuit of the && operator make sureamdgpu_device_is_peer_accessible is only called if necessary. And youdon't need the bool accessible variable any more.

+               if ((mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
+                   (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
+                       if (!accessible)
+                               return -EINVAL;
+               if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
+                       same_hive = amdgpu_xgmi_same_hive(adev, bo_adev);
+                       if (!same_hive && !accessible)
+                               return -EINVAL;
+               }
+       }
+
        for (i = 0; i <= is_aql; i++) {
                attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
                if (unlikely(!attachment[i])) {
@@ -708,9 +906,9 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
                pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
                         va + bo_size, vm);

- if (adev == bo_adev ||

-                  (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && 
adev->ram_is_direct_mapped) ||
-                  (mem->domain == AMDGPU_GEM_DOMAIN_VRAM && 
amdgpu_xgmi_same_hive(adev, bo_adev))) {
+               if ((adev == bo_adev && !(mem->alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
+                   (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && 
adev->ram_is_direct_mapped) ||
+                   same_hive) {
                        /* Mappings on the local GPU, or VRAM mappings in the
                         * local hive, or userptr mapping IOMMU direct map mode
                         * share the original BO
@@ -726,26 +924,38 @@ static int kfd_mem_attach(struct amdgpu_device *adev, 
struct kgd_mem *mem,
                } else if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
                        /* Create an SG BO to DMA-map userptrs on other GPUs */
                        attachment[i]->type = KFD_MEM_ATT_USERPTR;
-                       ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
+                       ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
                        if (ret)
                                goto unwind;
                } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
                           mem->bo->tbo.type != ttm_bo_type_sg) {
-                       /* GTT BOs use DMA-mapping ability of dynamic-attach
-                        * DMA bufs. TODO: The same should work for VRAM on
-                        * large-BAR GPUs.
-                        */
+                       /* GTT BOs use DMA-mapping ability of dynamic-attach 
DMA bufs */
+                       attachment[i]->type = KFD_MEM_ATT_DMABUF;
+                       ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
+                       if (ret)
+                               goto unwind;
+               /* Enable acces to VRAM BOs of peer devices */
+#if defined(CONFIG_HSA_AMD_P2P)
+               } else if (mem->domain == AMDGPU_GEM_DOMAIN_VRAM &&
+                          mem->bo->tbo.type == ttm_bo_type_device) {
                        attachment[i]->type = KFD_MEM_ATT_DMABUF;
                        ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);

Now this looks identical to the GTT case. Since the CONFIG_HSA_AMD_P2Pcheck is already done in amdgpu_device_is_peer_accessible, you canprobably just merge the two if-cases into one now:


        ...
                } else if ((mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
                            mem->bo->tbo.type != ttm_bo_type_sg) ||
                           mem->domain == AMDGPU_GEM_DOMAIN_VRAM) {
                        /* GTT and VRAM BOs use DMA-mapping ability of 
dynamic-attach
                         * DMA bufs.
                         */
                        attachment[i]->type = KFD_MEM_ATT_DMABUF;
                        ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
                        if (ret)
                                goto unwind;
        ...

If you move this after the doorbell and MMIO case, you can even drop themem->bo->tbo.type != ttm_bo_type_sg check.

                        if (ret)
                                goto unwind;
+                       pr_debug("Employ DMABUF mechanim to enable peer GPU 
access\n");
+#endif
+               /* Handle DOORBELL BOs of peer devices and MMIO BOs of local 
and peer devices */
+               } else if ((mem->bo->tbo.type == ttm_bo_type_sg) &&
+                          ((mem->alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
+                           (mem->alloc_flags & 
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {

I think userptr, doorbell and MMIO BOs are the only ones that use SGs.So you don't really need to check the mem->alloc_flags here. Exceptmaybe as a WARN_ONCE sanity check.

+                       attachment[i]->type = KFD_MEM_ATT_SG;
+                       ret = create_dmamap_sg_bo(adev, mem, &bo[i]);
+                       if (ret)
+                               goto unwind;
                } else {
-                       /* FIXME: Need to DMA-map other BO types:
-                        * large-BAR VRAM, doorbells, MMIO remap
-                        */
-                       attachment[i]->type = KFD_MEM_ATT_SHARED;
-                       bo[i] = mem->bo;
-                       drm_gem_object_get(&bo[i]->tbo.base);
+                       WARN_ONCE(true, "Handling invalid ATTACH request");
+                       ret = -EINVAL;
+                       goto unwind;
                }

/* Add BO to VM internal data structures */

@@ -1146,24 +1356,6 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
        return ret;
  }

-static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)

-{
-       struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
-
-       if (!sg)
-               return NULL;
-       if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
-               kfree(sg);
-               return NULL;
-       }
-       sg->sgl->dma_address = addr;
-       sg->sgl->length = size;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
-       sg->sgl->dma_length = size;
-#endif
-       return sg;
-}
-
  static int process_validate_vms(struct amdkfd_process_info *process_info)
  {
        struct amdgpu_vm *peer_vm;
@@ -1532,7 +1724,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                        bo_type = ttm_bo_type_sg;
                        if (size > UINT_MAX)
                                return -EINVAL;
-                       sg = create_doorbell_sg(*offset, size);
+                       sg = create_sg_table(*offset, size);
                        if (!sg)
                                return -ENOMEM;
                } else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b5ee0eb984ee..acb9e934adc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -32,6 +32,7 @@
  #include <linux/slab.h>
  #include <linux/iommu.h>
  #include <linux/pci.h>
+#include <linux/pci-p2pdma.h>

#include <drm/drm_atomic_helper.h>

  #include <drm/drm_probe_helper.h>
@@ -127,6 +128,8 @@ const char *amdgpu_asic_name[] = {
        "LAST",
  };

+extern bool pcie_p2p;

+
  /**
   * DOC: pcie_replay_count
   *
@@ -5434,6 +5437,36 @@ static void amdgpu_device_get_pcie_info(struct 
amdgpu_device *adev)
        }
  }

+/**

+ * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
+ *
+ * @adev: amdgpu_device pointer
+ * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
+ *
+ * Return true if @peer_adev can access (DMA) @adev through the PCIe
+ * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
+ * @peer_adev.
+ */
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+                                     struct amdgpu_device *peer_adev)
+{
+#ifdef CONFIG_HSA_AMD_P2P
+       bool p2p_access = false;
+       uint64_t address_mask = peer_adev->dev->dma_mask ?
+               ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
+       resource_size_t aper_limit =
+               adev->gmc.aper_base + adev->gmc.aper_size - 1;
+       p2p_access = !(pci_p2pdma_distance_many(adev->pdev, &peer_adev->dev, 1, 
true) < 0);


This would give you a checkpatch warning. Please run checkpatch.

Why can't you initialize p2p_access in the declaration above?

Regards,
  Felix

+
+       return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
+               adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
+               !(adev->gmc.aper_base & address_mask ||
+                 aper_limit & address_mask));
+#else
+       return false;
+#endif
+}
+
  int amdgpu_device_baco_enter(struct drm_device *dev)
  {
        struct amdgpu_device *adev = drm_to_adev(dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index bed4ed88951f..d1c82a9e8569 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -802,6 +802,14 @@ MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue 
eviction on VM fault (
  module_param_named(no_queue_eviction_on_vm_fault, 
amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
  #endif

+/**

+ * DOC: pcie_p2p (bool)
+ * Enable PCIe P2P (requires large-BAR). Default value: true (on)
+ */
+bool pcie_p2p = true;
+module_param(pcie_p2p, bool, 0444);
+MODULE_PARM_DESC(pcie_p2p, "Enable PCIe P2P (requires large-BAR). (N = off, Y = 
on(default))");
+
  /**
   * DOC: dcfeaturemask (uint)
   * Override display features enabled. See enum DC_FEATURE_MASK in 
drivers/gpu/drm/amd/include/amd_shared.h.

Re: [PATCH 2/3] drm/amdgpu: Add peer-to-peer support among PCIe connected AMD GPUs

Reply via email to