Some RAM device regions created with memory_region_init_ram_device_ptr()
are not intended to be P2P DMA targets.

The VFIO listener currently treats all RAM device regions as DMA
capable and attempts to map them into the IOMMU. For regions without
dma-buf backing this fails and prints warnings such as:

  IOMMU_IOAS_MAP failed: Bad address, PCI BAR?

Introduce a MemoryRegion flag (ram_device_skip_iommu_map) to mark RAM
device regions that should not be IOMMU mapped, paired with
memory_region_skip_iommu_map() / memory_region_set_skip_iommu_map()
accessors. When the flag is set, the VFIO listener skips DMA mapping
for that region.

Reviewed-by: Eric Auger <[email protected]>
Tested-by: Eric Auger <[email protected]>
Signed-off-by: Shameer Kolothum <[email protected]>
---
 include/system/memory.h | 21 +++++++++++++++++++++
 hw/vfio/listener.c      |  6 ++++++
 system/memory.c         | 10 ++++++++++
 hw/vfio/trace-events    |  1 +
 4 files changed, 38 insertions(+)

diff --git a/include/system/memory.h b/include/system/memory.h
index 1417132f6d..4560809013 100644
--- a/include/system/memory.h
+++ b/include/system/memory.h
@@ -864,6 +864,8 @@ struct MemoryRegion {
 
     /* For devices designed to perform re-entrant IO into their own IO MRs */
     bool disable_reentrancy_guard;
+    /* RAM device region that does not require IOMMU mapping for P2P */
+    bool ram_device_skip_iommu_map;
 };
 
 struct IOMMUMemoryRegion {
@@ -1743,6 +1745,25 @@ static inline bool memory_region_is_romd(const 
MemoryRegion *mr)
  */
 bool memory_region_is_protected(const MemoryRegion *mr);
 
+/**
+ * memory_region_skip_iommu_map: check whether a memory region is excluded
+ *                               from IOMMU mapping
+ *
+ * Returns %true if @mr is a RAM device region marked to skip IOMMU mapping.
+ *
+ * @mr: the memory region being queried
+ */
+bool memory_region_skip_iommu_map(const MemoryRegion *mr);
+
+/**
+ * memory_region_set_skip_iommu_map: mark a RAM device region to skip IOMMU
+ *                                   mapping
+ *
+ * @mr: the memory region being modified
+ * @skip: %true to skip IOMMU mapping, %false to allow it
+ */
+void memory_region_set_skip_iommu_map(MemoryRegion *mr, bool skip);
+
 /**
  * memory_region_has_guest_memfd: check whether a memory region has guest_memfd
  *     associated
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index 0b72a2cf5e..14cca678ae 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -610,6 +610,12 @@ void vfio_container_region_add(VFIOContainer *bcontainer,
         }
     }
 
+    if (memory_region_skip_iommu_map(section->mr)) {
+        
trace_vfio_listener_region_skip_dma_map(memory_region_name(section->mr),
+                                                iova, int128_get64(llsize));
+        return;
+    }
+
     ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
                                  vaddr, section->readonly, section->mr);
     if (ret) {
diff --git a/system/memory.c b/system/memory.c
index 739ba11da6..48245fd01b 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -1814,6 +1814,16 @@ bool memory_region_is_protected(const MemoryRegion *mr)
     return mr->ram && (mr->ram_block->flags & RAM_PROTECTED);
 }
 
+bool memory_region_skip_iommu_map(const MemoryRegion *mr)
+{
+    return memory_region_is_ram_device(mr) && mr->ram_device_skip_iommu_map;
+}
+
+void memory_region_set_skip_iommu_map(MemoryRegion *mr, bool skip)
+{
+    mr->ram_device_skip_iommu_map = skip;
+}
+
 bool memory_region_has_guest_memfd(const MemoryRegion *mr)
 {
     return mr->ram_block && mr->ram_block->guest_memfd >= 0;
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 2049159015..70c5aa1bcc 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -100,6 +100,7 @@ vfio_listener_region_del_iommu(const char *name) 
"region_del [iommu] %s"
 vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void 
*vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
 vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t 
offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" 
offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR
 vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t 
size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not 
aligned to 0x%"PRIx64" and cannot be mapped for DMA"
+vfio_listener_region_skip_dma_map(const char *name, uint64_t iova, uint64_t 
size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" marked to skip IOMMU mapping"
 vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" 
- 0x%"PRIx64
 vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, 
uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 
0x%"PRIx64"]"
 vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t 
max32, uint64_t min64, uint64_t max64, uint64_t minpci, uint64_t maxpci) 
"nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"], 
pci64:[0x%"PRIx64" - 0x%"PRIx64"]"
-- 
2.43.0


Reply via email to