[Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-11-14 Thread prasad . singamsetty
From: Prasad Singamsetty 

The current implementation of Intel IOMMU code only supports 39 bits
iova address width. This patch provides a new parameter (x-aw-bits)
for intel-iommu to extend its address width to 48 bits but keeping the
default the same (39 bits). The reason for not changing the default
is to avoid potential compatibility problems with live migration of
intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
parameter are 39 and 48.

After enabling larger address width (48), we should be able to map
larger iova addresses in the guest. For example, a QEMU guest that
is configured with large memory ( >=1TB ). To check whether 48 bits
aw is enabled, we can grep in the guest dmesg output with line:
"DMAR: Host address width 48".

Signed-off-by: Prasad Singamsetty 
---
 hw/i386/acpi-build.c   |   3 +-
 hw/i386/intel_iommu.c  | 101 -
 hw/i386/intel_iommu_internal.h |   9 ++--
 include/hw/i386/intel_iommu.h  |   1 +
 4 files changed, 65 insertions(+), 49 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 73519ab3ac..537957c89a 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2460,6 +2460,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
 AcpiDmarDeviceScope *scope = NULL;
 /* Root complex IOAPIC use one path[0] only */
 size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
+IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
 
 assert(iommu);
 if (iommu->intr_supported) {
@@ -2467,7 +2468,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
 }
 
 dmar = acpi_data_push(table_data, sizeof(*dmar));
-dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
+dmar->host_address_width = intel_iommu->aw_bits - 1;
 dmar->flags = dmar_flags;
 
 /* DMAR Remapping Hardware Unit Definition structure */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 53b3bf244d..c2380fdfdc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -521,9 +521,9 @@ static inline dma_addr_t 
vtd_ce_get_slpt_base(VTDContextEntry *ce)
 return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
 }
 
-static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
+static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
 {
-return slpte & VTD_SL_PT_BASE_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH);
+return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
 }
 
 /* Whether the pte indicates the address of the page frame */
@@ -608,20 +608,21 @@ static inline bool vtd_ce_type_check(X86IOMMUState 
*x86_iommu,
 return true;
 }
 
-static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
+static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw)
 {
 uint32_t ce_agaw = vtd_ce_get_agaw(ce);
-return 1ULL << MIN(ce_agaw, VTD_MGAW);
+return 1ULL << MIN(ce_agaw, aw);
 }
 
 /* Return true if IOVA passes range check, otherwise false. */
-static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
+static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
+uint8_t aw)
 {
 /*
  * Check if @iova is above 2^X-1, where X is the minimum of MGAW
  * in CAP_REG and AW in context-entry.
  */
-return !(iova & ~(vtd_iova_limit(ce) - 1));
+return !(iova & ~(vtd_iova_limit(ce, aw) - 1));
 }
 
 /*
@@ -669,7 +670,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, 
uint8_t bus_num)
  */
 static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
  uint64_t *slptep, uint32_t *slpte_level,
- bool *reads, bool *writes)
+ bool *reads, bool *writes, uint8_t aw_bits)
 {
 dma_addr_t addr = vtd_ce_get_slpt_base(ce);
 uint32_t level = vtd_ce_get_level(ce);
@@ -677,7 +678,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t 
iova, bool is_write,
 uint64_t slpte;
 uint64_t access_right_check;
 
-if (!vtd_iova_range_check(iova, ce)) {
+if (!vtd_iova_range_check(iova, ce, aw_bits)) {
 trace_vtd_err_dmar_iova_overflow(iova);
 return -VTD_FR_ADDR_BEYOND_MGAW;
 }
@@ -714,7 +715,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t 
iova, bool is_write,
 *slpte_level = level;
 return 0;
 }
-addr = vtd_get_slpte_addr(slpte);
+addr = vtd_get_slpte_addr(slpte, aw_bits);
 level--;
 }
 }
@@ -732,11 +733,12 @@ typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, 
void *private);
  * @read: whether parent level has read permission
  * @write: whether parent level has write permission
  * @notify_unmap: whether we should notify invalid entries
+ * @aw: maximum address width
  */
 static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
uint64_t end, vtd_page_walk_hook hook_fn,
-   

Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-11-28 Thread Michael S. Tsirkin
On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com wrote:
> From: Prasad Singamsetty 
> 
> The current implementation of Intel IOMMU code only supports 39 bits
> iova address width. This patch provides a new parameter (x-aw-bits)
> for intel-iommu to extend its address width to 48 bits but keeping the
> default the same (39 bits). The reason for not changing the default
> is to avoid potential compatibility problems

You can change the default, just make it 39 for existing machine types.

> with live migration of
> intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
> parameter are 39 and 48.

I'd rather make it a boolean then.

> 
> After enabling larger address width (48), we should be able to map
> larger iova addresses in the guest. For example, a QEMU guest that
> is configured with large memory ( >=1TB ). To check whether 48 bits
> aw is enabled, we can grep in the guest dmesg output with line:
> "DMAR: Host address width 48".
> 
> Signed-off-by: Prasad Singamsetty 
> ---
>  hw/i386/acpi-build.c   |   3 +-
>  hw/i386/intel_iommu.c  | 101 
> -
>  hw/i386/intel_iommu_internal.h |   9 ++--
>  include/hw/i386/intel_iommu.h  |   1 +
>  4 files changed, 65 insertions(+), 49 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 73519ab3ac..537957c89a 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2460,6 +2460,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
>  AcpiDmarDeviceScope *scope = NULL;
>  /* Root complex IOAPIC use one path[0] only */
>  size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
> +IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
>  
>  assert(iommu);
>  if (iommu->intr_supported) {
> @@ -2467,7 +2468,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
>  }
>  
>  dmar = acpi_data_push(table_data, sizeof(*dmar));
> -dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
> +dmar->host_address_width = intel_iommu->aw_bits - 1;
>  dmar->flags = dmar_flags;
>  
>  /* DMAR Remapping Hardware Unit Definition structure */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 53b3bf244d..c2380fdfdc 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -521,9 +521,9 @@ static inline dma_addr_t 
> vtd_ce_get_slpt_base(VTDContextEntry *ce)
>  return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
>  }
>  
> -static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
> +static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
>  {
> -return slpte & VTD_SL_PT_BASE_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH);
> +return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
>  }
>  
>  /* Whether the pte indicates the address of the page frame */
> @@ -608,20 +608,21 @@ static inline bool vtd_ce_type_check(X86IOMMUState 
> *x86_iommu,
>  return true;
>  }
>  
> -static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
> +static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw)
>  {
>  uint32_t ce_agaw = vtd_ce_get_agaw(ce);
> -return 1ULL << MIN(ce_agaw, VTD_MGAW);
> +return 1ULL << MIN(ce_agaw, aw);
>  }
>  
>  /* Return true if IOVA passes range check, otherwise false. */
> -static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
> +static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
> +uint8_t aw)
>  {
>  /*
>   * Check if @iova is above 2^X-1, where X is the minimum of MGAW
>   * in CAP_REG and AW in context-entry.
>   */
> -return !(iova & ~(vtd_iova_limit(ce) - 1));
> +return !(iova & ~(vtd_iova_limit(ce, aw) - 1));
>  }
>  
>  /*
> @@ -669,7 +670,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState 
> *s, uint8_t bus_num)
>   */
>  static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool 
> is_write,
>   uint64_t *slptep, uint32_t *slpte_level,
> - bool *reads, bool *writes)
> + bool *reads, bool *writes, uint8_t aw_bits)
>  {
>  dma_addr_t addr = vtd_ce_get_slpt_base(ce);
>  uint32_t level = vtd_ce_get_level(ce);
> @@ -677,7 +678,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, 
> uint64_t iova, bool is_write,
>  uint64_t slpte;
>  uint64_t access_right_check;
>  
> -if (!vtd_iova_range_check(iova, ce)) {
> +if (!vtd_iova_range_check(iova, ce, aw_bits)) {
>  trace_vtd_err_dmar_iova_overflow(iova);
>  return -VTD_FR_ADDR_BEYOND_MGAW;
>  }
> @@ -714,7 +715,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, 
> uint64_t iova, bool is_write,
>  *slpte_level = level;
>  return 0;
>  }
> -addr = vtd_get_slpte_addr(slpte);
> +addr = vtd_get_slpte_addr(slpte, aw_bits);
>  level--;
>  }
>  }
> @@ -73

Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-11-29 Thread Prasad Singamsetty

Thanks Michael. Some comments below.

On 11/28/2017 9:32 AM, Michael S. Tsirkin wrote:

On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com wrote:

From: Prasad Singamsetty 

The current implementation of Intel IOMMU code only supports 39 bits
iova address width. This patch provides a new parameter (x-aw-bits)
for intel-iommu to extend its address width to 48 bits but keeping the
default the same (39 bits). The reason for not changing the default
is to avoid potential compatibility problems


You can change the default, just make it 39 for existing machine types.


I think introducing a new machine type is not appropriate as this
is an implementation limitation for the existing machine type.
Currently q35 is the only machine type that supports intel-iommu.
And we want to retain the current default behavior for q35 to avoid
any new issues with live migration.




with live migration of
intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
parameter are 39 and 48.


I'd rather make it a boolean then.


Right. It seems Intel already has additional sizes supported so keeping
it as an integer seems better.

Thanks.
--Prasad





After enabling larger address width (48), we should be able to map
larger iova addresses in the guest. For example, a QEMU guest that
is configured with large memory ( >=1TB ). To check whether 48 bits
aw is enabled, we can grep in the guest dmesg output with line:
"DMAR: Host address width 48".

Signed-off-by: Prasad Singamsetty 
---
  hw/i386/acpi-build.c   |   3 +-
  hw/i386/intel_iommu.c  | 101 -
  hw/i386/intel_iommu_internal.h |   9 ++--
  include/hw/i386/intel_iommu.h  |   1 +
  4 files changed, 65 insertions(+), 49 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 73519ab3ac..537957c89a 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2460,6 +2460,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
  AcpiDmarDeviceScope *scope = NULL;
  /* Root complex IOAPIC use one path[0] only */
  size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
+IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
  
  assert(iommu);

  if (iommu->intr_supported) {
@@ -2467,7 +2468,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
  }
  
  dmar = acpi_data_push(table_data, sizeof(*dmar));

-dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
+dmar->host_address_width = intel_iommu->aw_bits - 1;
  dmar->flags = dmar_flags;
  
  /* DMAR Remapping Hardware Unit Definition structure */

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 53b3bf244d..c2380fdfdc 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -521,9 +521,9 @@ static inline dma_addr_t 
vtd_ce_get_slpt_base(VTDContextEntry *ce)
  return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
  }
  
-static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)

+static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
  {
-return slpte & VTD_SL_PT_BASE_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH);
+return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
  }
  
  /* Whether the pte indicates the address of the page frame */

@@ -608,20 +608,21 @@ static inline bool vtd_ce_type_check(X86IOMMUState 
*x86_iommu,
  return true;
  }
  
-static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)

+static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw)
  {
  uint32_t ce_agaw = vtd_ce_get_agaw(ce);
-return 1ULL << MIN(ce_agaw, VTD_MGAW);
+return 1ULL << MIN(ce_agaw, aw);
  }
  
  /* Return true if IOVA passes range check, otherwise false. */

-static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
+static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
+uint8_t aw)
  {
  /*
   * Check if @iova is above 2^X-1, where X is the minimum of MGAW
   * in CAP_REG and AW in context-entry.
   */
-return !(iova & ~(vtd_iova_limit(ce) - 1));
+return !(iova & ~(vtd_iova_limit(ce, aw) - 1));
  }
  
  /*

@@ -669,7 +670,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, 
uint8_t bus_num)
   */
  static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool 
is_write,
   uint64_t *slptep, uint32_t *slpte_level,
- bool *reads, bool *writes)
+ bool *reads, bool *writes, uint8_t aw_bits)
  {
  dma_addr_t addr = vtd_ce_get_slpt_base(ce);
  uint32_t level = vtd_ce_get_level(ce);
@@ -677,7 +678,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t 
iova, bool is_write,
  uint64_t slpte;
  uint64_t access_right_check;
  
-if (!vtd_iova_range_check(iova, ce)) {

+if (!vtd_iova_range_check(iova, ce, aw_bits)) {
  trace_vtd_err_dmar_iova_overflow(iova);
  retu

Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-11-29 Thread Peter Xu
On Wed, Nov 29, 2017 at 01:05:22PM -0800, Prasad Singamsetty wrote:
> Thanks Michael. Some comments below.
> 
> On 11/28/2017 9:32 AM, Michael S. Tsirkin wrote:
> > On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com 
> > wrote:
> > > From: Prasad Singamsetty 
> > > 
> > > The current implementation of Intel IOMMU code only supports 39 bits
> > > iova address width. This patch provides a new parameter (x-aw-bits)
> > > for intel-iommu to extend its address width to 48 bits but keeping the
> > > default the same (39 bits). The reason for not changing the default
> > > is to avoid potential compatibility problems
> > 
> > You can change the default, just make it 39 for existing machine types.
> 
> I think introducing a new machine type is not appropriate as this
> is an implementation limitation for the existing machine type.
> Currently q35 is the only machine type that supports intel-iommu.
> And we want to retain the current default behavior for q35 to avoid
> any new issues with live migration.

I guess "existing machine type" means e.g. pc-q35-2.11 and older ones,
rather than creating another machine type in parallel with q35.  So we
can set 48 bits as default on upcoming pc-q35-2.12 machines, while
keep the 39 bits on the old ones.

Please refer to include/hw/compat.h.

> 
> > 
> > > with live migration of
> > > intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
> > > parameter are 39 and 48.
> > 
> > I'd rather make it a boolean then.
> 
> Right. It seems Intel already has additional sizes supported so keeping
> it as an integer seems better.

Yes, considering that 5-level IOMMUs are coming (AFAIK).

-- 
Peter Xu



Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-11-29 Thread Liu, Yi L
On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com wrote:
> From: Prasad Singamsetty 
> 
> The current implementation of Intel IOMMU code only supports 39 bits
> iova address width. This patch provides a new parameter (x-aw-bits)
> for intel-iommu to extend its address width to 48 bits but keeping the
> default the same (39 bits). The reason for not changing the default
> is to avoid potential compatibility problems with live migration of
> intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
> parameter are 39 and 48.
> 
> After enabling larger address width (48), we should be able to map
> larger iova addresses in the guest. For example, a QEMU guest that
> is configured with large memory ( >=1TB ). To check whether 48 bits

I didn't quite get your point here. Address width limits the iova range,
but it doesn't limit the guest memory range. e.g. you can use 39 bit iova
address to access a guest physical address larger than (2^39 - 1) as long
as the guest 2nd level page table is well programmed. Only one exception,
if you requires a continuous iova range(e.g. 2^39), it would be an issue.
Not sure if this is the major motivation of your patch? However, I'm not
against extend the address width to be 48 bits. Just need to make it clear
here.

Regards,
Yi L

> aw is enabled, we can grep in the guest dmesg output with line:
> "DMAR: Host address width 48".
> 
> Signed-off-by: Prasad Singamsetty 
> ---
>  hw/i386/acpi-build.c   |   3 +-
>  hw/i386/intel_iommu.c  | 101 
> -
>  hw/i386/intel_iommu_internal.h |   9 ++--
>  include/hw/i386/intel_iommu.h  |   1 +
>  4 files changed, 65 insertions(+), 49 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 73519ab3ac..537957c89a 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2460,6 +2460,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
>  AcpiDmarDeviceScope *scope = NULL;
>  /* Root complex IOAPIC use one path[0] only */
>  size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
> +IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
>  
>  assert(iommu);
>  if (iommu->intr_supported) {
> @@ -2467,7 +2468,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
>  }
>  
>  dmar = acpi_data_push(table_data, sizeof(*dmar));
> -dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
> +dmar->host_address_width = intel_iommu->aw_bits - 1;
>  dmar->flags = dmar_flags;
>  
>  /* DMAR Remapping Hardware Unit Definition structure */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 53b3bf244d..c2380fdfdc 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -521,9 +521,9 @@ static inline dma_addr_t 
> vtd_ce_get_slpt_base(VTDContextEntry *ce)
>  return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
>  }
>  
> -static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
> +static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
>  {
> -return slpte & VTD_SL_PT_BASE_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH);
> +return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
>  }
>  
>  /* Whether the pte indicates the address of the page frame */
> @@ -608,20 +608,21 @@ static inline bool vtd_ce_type_check(X86IOMMUState 
> *x86_iommu,
>  return true;
>  }
>  
> -static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
> +static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw)
>  {
>  uint32_t ce_agaw = vtd_ce_get_agaw(ce);
> -return 1ULL << MIN(ce_agaw, VTD_MGAW);
> +return 1ULL << MIN(ce_agaw, aw);
>  }
>  
>  /* Return true if IOVA passes range check, otherwise false. */
> -static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
> +static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
> +uint8_t aw)
>  {
>  /*
>   * Check if @iova is above 2^X-1, where X is the minimum of MGAW
>   * in CAP_REG and AW in context-entry.
>   */
> -return !(iova & ~(vtd_iova_limit(ce) - 1));
> +return !(iova & ~(vtd_iova_limit(ce, aw) - 1));
>  }
>  
>  /*
> @@ -669,7 +670,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState 
> *s, uint8_t bus_num)
>   */
>  static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool 
> is_write,
>   uint64_t *slptep, uint32_t *slpte_level,
> - bool *reads, bool *writes)
> + bool *reads, bool *writes, uint8_t aw_bits)
>  {
>  dma_addr_t addr = vtd_ce_get_slpt_base(ce);
>  uint32_t level = vtd_ce_get_level(ce);
> @@ -677,7 +678,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, 
> uint64_t iova, bool is_write,
>  uint64_t slpte;
>  uint64_t access_right_check;
>  
> -if (!vtd_iova_range_check(iova, ce)) {
> +if (!vtd_iova_range_check(iova, ce, aw_bits)) 

Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-11-30 Thread Peter Xu
On Thu, Nov 30, 2017 at 01:22:38PM +0800, Liu, Yi L wrote:
> On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com wrote:
> > From: Prasad Singamsetty 
> > 
> > The current implementation of Intel IOMMU code only supports 39 bits
> > iova address width. This patch provides a new parameter (x-aw-bits)
> > for intel-iommu to extend its address width to 48 bits but keeping the
> > default the same (39 bits). The reason for not changing the default
> > is to avoid potential compatibility problems with live migration of
> > intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
> > parameter are 39 and 48.
> > 
> > After enabling larger address width (48), we should be able to map
> > larger iova addresses in the guest. For example, a QEMU guest that
> > is configured with large memory ( >=1TB ). To check whether 48 bits
> 
> I didn't quite get your point here. Address width limits the iova range,
> but it doesn't limit the guest memory range. e.g. you can use 39 bit iova
> address to access a guest physical address larger than (2^39 - 1) as long
> as the guest 2nd level page table is well programmed. Only one exception,
> if you requires a continuous iova range(e.g. 2^39), it would be an issue.
> Not sure if this is the major motivation of your patch? However, I'm not
> against extend the address width to be 48 bits. Just need to make it clear
> here.

One thing I can think of is the identity mapping. Say, when iommu=pt
is set in guest, meanwhile when PT capability is not supported from
hardware (here I mean, the emulated hardware, of course), guest kernel
will create one identity mapping to emulate the PT mode.

Current linux kernel's identity mapping should be a static 48 bits
mapping (it must cover the whole memory range of guest), so if we
provide a 39 bits vIOMMU to the guest, AFAIU we'll fail at device
attaching to that identity domain of every single device that is
protected by that 39 bits vIOMMU (kernel will find that domain gaw is
bigger than vIOMMU supported gaw of that device).

I do see no good fix for that, except boost the supported gaw to
bigger ones.

Thanks,

-- 
Peter Xu



Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-11-30 Thread Liu, Yi L
On Thu, Nov 30, 2017 at 05:11:55PM +0800, Peter Xu wrote:
> On Thu, Nov 30, 2017 at 01:22:38PM +0800, Liu, Yi L wrote:
> > On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com 
> > wrote:
> > > From: Prasad Singamsetty 
> > > 
> > > The current implementation of Intel IOMMU code only supports 39 bits
> > > iova address width. This patch provides a new parameter (x-aw-bits)
> > > for intel-iommu to extend its address width to 48 bits but keeping the
> > > default the same (39 bits). The reason for not changing the default
> > > is to avoid potential compatibility problems with live migration of
> > > intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
> > > parameter are 39 and 48.
> > > 
> > > After enabling larger address width (48), we should be able to map
> > > larger iova addresses in the guest. For example, a QEMU guest that
> > > is configured with large memory ( >=1TB ). To check whether 48 bits
> > 
> > I didn't quite get your point here. Address width limits the iova range,
> > but it doesn't limit the guest memory range. e.g. you can use 39 bit iova
> > address to access a guest physical address larger than (2^39 - 1) as long
> > as the guest 2nd level page table is well programmed. Only one exception,
> > if you requires a continuous iova range(e.g. 2^39), it would be an issue.
> > Not sure if this is the major motivation of your patch? However, I'm not
> > against extend the address width to be 48 bits. Just need to make it clear
> > here.
> 
> One thing I can think of is the identity mapping. Say, when iommu=pt
> is set in guest, meanwhile when PT capability is not supported from
> hardware (here I mean, the emulated hardware, of course), guest kernel
> will create one identity mapping to emulate the PT mode.

True.
 
> Current linux kernel's identity mapping should be a static 48 bits
> mapping (it must cover the whole memory range of guest), so if we

I suppose guest memory range depends on the AW reported by CPUID? Not sure
if it is constantly 48 bits.

> provide a 39 bits vIOMMU to the guest, AFAIU we'll fail at device
> attaching to that identity domain of every single device that is
> protected by that 39 bits vIOMMU (kernel will find that domain gaw is
> bigger than vIOMMU supported gaw of that device).

Yeah, this is a good argue. As it is 1:1 mapping, the translated address
is limited all the same.

> I do see no good fix for that, except boost the supported gaw to
> bigger ones.

How about defaultly expose cap.PT bit? In that case, there will no 1:1
mapping in guest side. Translation is skipped. So the IOMMU AW won't
limit the addressing.

Regards,
Yi L

> 
> Thanks,
> 
> -- 
> Peter Xu
> 



Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-11-30 Thread Peter Xu
On Thu, Nov 30, 2017 at 05:54:35PM +0800, Liu, Yi L wrote:
> On Thu, Nov 30, 2017 at 05:11:55PM +0800, Peter Xu wrote:
> > On Thu, Nov 30, 2017 at 01:22:38PM +0800, Liu, Yi L wrote:
> > > On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com 
> > > wrote:
> > > > From: Prasad Singamsetty 
> > > > 
> > > > The current implementation of Intel IOMMU code only supports 39 bits
> > > > iova address width. This patch provides a new parameter (x-aw-bits)
> > > > for intel-iommu to extend its address width to 48 bits but keeping the
> > > > default the same (39 bits). The reason for not changing the default
> > > > is to avoid potential compatibility problems with live migration of
> > > > intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
> > > > parameter are 39 and 48.
> > > > 
> > > > After enabling larger address width (48), we should be able to map
> > > > larger iova addresses in the guest. For example, a QEMU guest that
> > > > is configured with large memory ( >=1TB ). To check whether 48 bits
> > > 
> > > I didn't quite get your point here. Address width limits the iova range,
> > > but it doesn't limit the guest memory range. e.g. you can use 39 bit iova
> > > address to access a guest physical address larger than (2^39 - 1) as long
> > > as the guest 2nd level page table is well programmed. Only one exception,
> > > if you requires a continuous iova range(e.g. 2^39), it would be an issue.
> > > Not sure if this is the major motivation of your patch? However, I'm not
> > > against extend the address width to be 48 bits. Just need to make it clear
> > > here.
> > 
> > One thing I can think of is the identity mapping. Say, when iommu=pt
> > is set in guest, meanwhile when PT capability is not supported from
> > hardware (here I mean, the emulated hardware, of course), guest kernel
> > will create one identity mapping to emulate the PT mode.
> 
> True.
>  
> > Current linux kernel's identity mapping should be a static 48 bits
> > mapping (it must cover the whole memory range of guest), so if we
> 
> I suppose guest memory range depends on the AW reported by CPUID? Not sure
> if it is constantly 48 bits.

Please refer to si_domain_init() and DEFAULT_DOMAIN_ADDRESS_WIDTH.

> 
> > provide a 39 bits vIOMMU to the guest, AFAIU we'll fail at device
> > attaching to that identity domain of every single device that is
> > protected by that 39 bits vIOMMU (kernel will find that domain gaw is
> > bigger than vIOMMU supported gaw of that device).
> 
> Yeah, this is a good argue. As it is 1:1 mapping, the translated address
> is limited all the same.
> 
> > I do see no good fix for that, except boost the supported gaw to
> > bigger ones.
> 
> How about defaultly expose cap.PT bit? In that case, there will no 1:1
> mapping in guest side. Translation is skipped. So the IOMMU AW won't
> limit the addressing.

PT is defaultly on already from the first day it's there. :)

-- 
Peter Xu



Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-12-01 Thread Michael S. Tsirkin
On Thu, Nov 30, 2017 at 10:33:50AM -0800, Prasad Singamsetty wrote:
> 
> 
> On 11/29/2017 7:25 PM, Peter Xu wrote:
> > On Wed, Nov 29, 2017 at 01:05:22PM -0800, Prasad Singamsetty wrote:
> > > Thanks Michael. Some comments below.
> > > 
> > > On 11/28/2017 9:32 AM, Michael S. Tsirkin wrote:
> > > > On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com 
> > > > wrote:
> > > > > From: Prasad Singamsetty 
> > > > > 
> > > > > The current implementation of Intel IOMMU code only supports 39 bits
> > > > > iova address width. This patch provides a new parameter (x-aw-bits)
> > > > > for intel-iommu to extend its address width to 48 bits but keeping the
> > > > > default the same (39 bits). The reason for not changing the default
> > > > > is to avoid potential compatibility problems
> > > > 
> > > > You can change the default, just make it 39 for existing machine types.
> > > 
> > > I think introducing a new machine type is not appropriate as this
> > > is an implementation limitation for the existing machine type.
> > > Currently q35 is the only machine type that supports intel-iommu.
> > > And we want to retain the current default behavior for q35 to avoid
> > > any new issues with live migration.
> > 
> > I guess "existing machine type" means e.g. pc-q35-2.11 and older ones,
> > rather than creating another machine type in parallel with q35.  So we
> > can set 48 bits as default on upcoming pc-q35-2.12 machines, while
> > keep the 39 bits on the old ones.
> > 
> > Please refer to include/hw/compat.h.
> 
> Thanks Peter, for the clarification and pointer to this. I am still
> new to this but learning on how this works or how this is used in
> use cases like Live Migration.
> 
> Are you suggesting that we change the default to 48 bits in the
> next release (2.12)?
> 
> User need to specify an older machine type  (pc-q35-2.11 or older)
> to get the old default value of 39 bits. This still requires the
> patch I proposed to support compatibility for older releases
> except the introduction of the new property (x-aw-bits).

Yes. If you see a reason for users to limit it to 39 bits,
we can make it a supported property (not starting
with x-). If it's only for live migration, we can
use a non-supported property (with x-).

> 
> > 
> > > 
> > > > 
> > > > > with live migration of
> > > > > intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
> > > > > parameter are 39 and 48.
> > > > 
> > > > I'd rather make it a boolean then.
> > > 
> > > Right. It seems Intel already has additional sizes supported so keeping
> > > it as an integer seems better.
> > 
> > Yes, considering that 5-level IOMMUs are coming (AFAIK).
> > 
> 
> If we change the default value to 48 bits, I assume there is
> no need for this property and user is expected to use an
> older machine type based on the release to get the old
> default. Is this correct?
> 
> Thanks.
> --Prasad


No, users use an older machine type to get migration from old
machine types. If someone might actually want 39 bit for
some reason, we need it as a supported property.



Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-12-01 Thread Liu, Yi L
On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com wrote:
> From: Prasad Singamsetty 
> 
> The current implementation of Intel IOMMU code only supports 39 bits
> iova address width. This patch provides a new parameter (x-aw-bits)
> for intel-iommu to extend its address width to 48 bits but keeping the
> default the same (39 bits). The reason for not changing the default
> is to avoid potential compatibility problems with live migration of
> intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
> parameter are 39 and 48.
> 
> After enabling larger address width (48), we should be able to map
> larger iova addresses in the guest. For example, a QEMU guest that
> is configured with large memory ( >=1TB ). To check whether 48 bits
> aw is enabled, we can grep in the guest dmesg output with line:
> "DMAR: Host address width 48".
> 
> Signed-off-by: Prasad Singamsetty 

Prasad,

Have you tested the scenario with physical device assigned to a guest?

Regards,
Yi L
> ---
>  hw/i386/acpi-build.c   |   3 +-
>  hw/i386/intel_iommu.c  | 101 
> -
>  hw/i386/intel_iommu_internal.h |   9 ++--
>  include/hw/i386/intel_iommu.h  |   1 +
>  4 files changed, 65 insertions(+), 49 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 73519ab3ac..537957c89a 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2460,6 +2460,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
>  AcpiDmarDeviceScope *scope = NULL;
>  /* Root complex IOAPIC use one path[0] only */
>  size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
> +IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
>  
>  assert(iommu);
>  if (iommu->intr_supported) {
> @@ -2467,7 +2468,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
>  }
>  
>  dmar = acpi_data_push(table_data, sizeof(*dmar));
> -dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
> +dmar->host_address_width = intel_iommu->aw_bits - 1;
>  dmar->flags = dmar_flags;
>  
>  /* DMAR Remapping Hardware Unit Definition structure */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 53b3bf244d..c2380fdfdc 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -521,9 +521,9 @@ static inline dma_addr_t 
> vtd_ce_get_slpt_base(VTDContextEntry *ce)
>  return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
>  }
>  
> -static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
> +static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
>  {
> -return slpte & VTD_SL_PT_BASE_ADDR_MASK(VTD_HOST_ADDRESS_WIDTH);
> +return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
>  }
>  
>  /* Whether the pte indicates the address of the page frame */
> @@ -608,20 +608,21 @@ static inline bool vtd_ce_type_check(X86IOMMUState 
> *x86_iommu,
>  return true;
>  }
>  
> -static inline uint64_t vtd_iova_limit(VTDContextEntry *ce)
> +static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw)
>  {
>  uint32_t ce_agaw = vtd_ce_get_agaw(ce);
> -return 1ULL << MIN(ce_agaw, VTD_MGAW);
> +return 1ULL << MIN(ce_agaw, aw);
>  }
>  
>  /* Return true if IOVA passes range check, otherwise false. */
> -static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce)
> +static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
> +uint8_t aw)
>  {
>  /*
>   * Check if @iova is above 2^X-1, where X is the minimum of MGAW
>   * in CAP_REG and AW in context-entry.
>   */
> -return !(iova & ~(vtd_iova_limit(ce) - 1));
> +return !(iova & ~(vtd_iova_limit(ce, aw) - 1));
>  }
>  
>  /*
> @@ -669,7 +670,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState 
> *s, uint8_t bus_num)
>   */
>  static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool 
> is_write,
>   uint64_t *slptep, uint32_t *slpte_level,
> - bool *reads, bool *writes)
> + bool *reads, bool *writes, uint8_t aw_bits)
>  {
>  dma_addr_t addr = vtd_ce_get_slpt_base(ce);
>  uint32_t level = vtd_ce_get_level(ce);
> @@ -677,7 +678,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, 
> uint64_t iova, bool is_write,
>  uint64_t slpte;
>  uint64_t access_right_check;
>  
> -if (!vtd_iova_range_check(iova, ce)) {
> +if (!vtd_iova_range_check(iova, ce, aw_bits)) {
>  trace_vtd_err_dmar_iova_overflow(iova);
>  return -VTD_FR_ADDR_BEYOND_MGAW;
>  }
> @@ -714,7 +715,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, 
> uint64_t iova, bool is_write,
>  *slpte_level = level;
>  return 0;
>  }
> -addr = vtd_get_slpte_addr(slpte);
> +addr = vtd_get_slpte_addr(slpte, aw_bits);
>  level--;
>  }
>  }
> @@ -732,11 +733,12 @@ t

Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-12-01 Thread Prasad Singamsetty



On 11/30/2017 10:56 AM, Michael S. Tsirkin wrote:

On Thu, Nov 30, 2017 at 10:33:50AM -0800, Prasad Singamsetty wrote:



On 11/29/2017 7:25 PM, Peter Xu wrote:

On Wed, Nov 29, 2017 at 01:05:22PM -0800, Prasad Singamsetty wrote:

Thanks Michael. Some comments below.

On 11/28/2017 9:32 AM, Michael S. Tsirkin wrote:

On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com wrote:

From: Prasad Singamsetty 

The current implementation of Intel IOMMU code only supports 39 bits
iova address width. This patch provides a new parameter (x-aw-bits)
for intel-iommu to extend its address width to 48 bits but keeping the
default the same (39 bits). The reason for not changing the default
is to avoid potential compatibility problems


You can change the default, just make it 39 for existing machine types.


I think introducing a new machine type is not appropriate as this
is an implementation limitation for the existing machine type.
Currently q35 is the only machine type that supports intel-iommu.
And we want to retain the current default behavior for q35 to avoid
any new issues with live migration.


I guess "existing machine type" means e.g. pc-q35-2.11 and older ones,
rather than creating another machine type in parallel with q35.  So we
can set 48 bits as default on upcoming pc-q35-2.12 machines, while
keep the 39 bits on the old ones.

Please refer to include/hw/compat.h.


Thanks Peter, for the clarification and pointer to this. I am still
new to this but learning on how this works or how this is used in
use cases like Live Migration.

Are you suggesting that we change the default to 48 bits in the
next release (2.12)?

User need to specify an older machine type  (pc-q35-2.11 or older)
to get the old default value of 39 bits. This still requires the
patch I proposed to support compatibility for older releases
except the introduction of the new property (x-aw-bits).


Yes. If you see a reason for users to limit it to 39 bits,
we can make it a supported property (not starting
with x-). If it's only for live migration, we can
use a non-supported property (with x-).

I think it is only for Live Migration case, we need to
support the old default value of 39 bits.

Do you see any need to keep a non-supported property?
It may be useful for developers.












with live migration of
intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
parameter are 39 and 48.


I'd rather make it a boolean then.


Right. It seems Intel already has additional sizes supported so keeping
it as an integer seems better.


Yes, considering that 5-level IOMMUs are coming (AFAIK).



If we change the default value to 48 bits, I assume there is
no need for this property and user is expected to use an
older machine type based on the release to get the old
default. Is this correct?

Thanks.
--Prasad



No, users use an older machine type to get migration from old
machine types. If someone might actually want 39 bit for
some reason, we need it as a supported property.


OK. Other than Live Migration case I don't see a need for supported
property.

Thanks.
--Prasad



Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-12-01 Thread Peter Xu
On Thu, Nov 30, 2017 at 11:12:48AM -0800, Prasad Singamsetty wrote:
> 
> 
> On 11/30/2017 10:56 AM, Michael S. Tsirkin wrote:
> > On Thu, Nov 30, 2017 at 10:33:50AM -0800, Prasad Singamsetty wrote:
> > > 
> > > 
> > > On 11/29/2017 7:25 PM, Peter Xu wrote:
> > > > On Wed, Nov 29, 2017 at 01:05:22PM -0800, Prasad Singamsetty wrote:
> > > > > Thanks Michael. Some comments below.
> > > > > 
> > > > > On 11/28/2017 9:32 AM, Michael S. Tsirkin wrote:
> > > > > > On Tue, Nov 14, 2017 at 06:13:50PM -0500, 
> > > > > > prasad.singamse...@oracle.com wrote:
> > > > > > > From: Prasad Singamsetty 
> > > > > > > 
> > > > > > > The current implementation of Intel IOMMU code only supports 39 
> > > > > > > bits
> > > > > > > iova address width. This patch provides a new parameter 
> > > > > > > (x-aw-bits)
> > > > > > > for intel-iommu to extend its address width to 48 bits but 
> > > > > > > keeping the
> > > > > > > default the same (39 bits). The reason for not changing the 
> > > > > > > default
> > > > > > > is to avoid potential compatibility problems
> > > > > > 
> > > > > > You can change the default, just make it 39 for existing machine 
> > > > > > types.
> > > > > 
> > > > > I think introducing a new machine type is not appropriate as this
> > > > > is an implementation limitation for the existing machine type.
> > > > > Currently q35 is the only machine type that supports intel-iommu.
> > > > > And we want to retain the current default behavior for q35 to avoid
> > > > > any new issues with live migration.
> > > > 
> > > > I guess "existing machine type" means e.g. pc-q35-2.11 and older ones,
> > > > rather than creating another machine type in parallel with q35.  So we
> > > > can set 48 bits as default on upcoming pc-q35-2.12 machines, while
> > > > keep the 39 bits on the old ones.
> > > > 
> > > > Please refer to include/hw/compat.h.
> > > 
> > > Thanks Peter, for the clarification and pointer to this. I am still
> > > new to this but learning on how this works or how this is used in
> > > use cases like Live Migration.

You can refer to similar commit, like 048a2e8869.

But, I think I was wrong - you should better do the addition to
include/hw/i386/pc.h (see e.g. PC_COMPAT_2_10) rather than compat.h,
since Intel vIOMMU is only used for PC machines.

And... you may also need to create that PC_COMPAT_2_11 macro after
2.11 is released.  For that you can refer to a6fd5b0e050a.

Anyway, I think this "set default" work can be postponed after recent
release, which can be a separate work besides current series.

> > > 
> > > Are you suggesting that we change the default to 48 bits in the
> > > next release (2.12)?
> > > 
> > > User need to specify an older machine type  (pc-q35-2.11 or older)
> > > to get the old default value of 39 bits. This still requires the
> > > patch I proposed to support compatibility for older releases
> > > except the introduction of the new property (x-aw-bits).
> > 
> > Yes. If you see a reason for users to limit it to 39 bits,
> > we can make it a supported property (not starting
> > with x-). If it's only for live migration, we can
> > use a non-supported property (with x-).
> I think it is only for Live Migration case, we need to
> support the old default value of 39 bits.
> 
> Do you see any need to keep a non-supported property?
> It may be useful for developers.

IMHO if it's for developers x-* would be good enough.  Thanks,

-- 
Peter Xu



Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-12-01 Thread Michael S. Tsirkin
On Fri, Dec 01, 2017 at 09:02:30AM -0800, Prasad Singamsetty wrote:
> 
> 
> On 11/30/2017 8:43 PM, Peter Xu wrote:
> > On Thu, Nov 30, 2017 at 11:12:48AM -0800, Prasad Singamsetty wrote:
> > > 
> > > 
> > > On 11/30/2017 10:56 AM, Michael S. Tsirkin wrote:
> > > > On Thu, Nov 30, 2017 at 10:33:50AM -0800, Prasad Singamsetty wrote:
> > > > > 
> > > > > 
> > > > > On 11/29/2017 7:25 PM, Peter Xu wrote:
> > > > > > On Wed, Nov 29, 2017 at 01:05:22PM -0800, Prasad Singamsetty wrote:
> > > > > > > Thanks Michael. Some comments below.
> > > > > > > 
> > > > > > > On 11/28/2017 9:32 AM, Michael S. Tsirkin wrote:
> > > > > > > > On Tue, Nov 14, 2017 at 06:13:50PM -0500, 
> > > > > > > > prasad.singamse...@oracle.com wrote:
> > > > > > > > > From: Prasad Singamsetty 
> > > > > > > > > 
> > > > > > > > > The current implementation of Intel IOMMU code only supports 
> > > > > > > > > 39 bits
> > > > > > > > > iova address width. This patch provides a new parameter 
> > > > > > > > > (x-aw-bits)
> > > > > > > > > for intel-iommu to extend its address width to 48 bits but 
> > > > > > > > > keeping the
> > > > > > > > > default the same (39 bits). The reason for not changing the 
> > > > > > > > > default
> > > > > > > > > is to avoid potential compatibility problems
> > > > > > > > 
> > > > > > > > You can change the default, just make it 39 for existing 
> > > > > > > > machine types.
> > > > > > > 
> > > > > > > I think introducing a new machine type is not appropriate as this
> > > > > > > is an implementation limitation for the existing machine type.
> > > > > > > Currently q35 is the only machine type that supports intel-iommu.
> > > > > > > And we want to retain the current default behavior for q35 to 
> > > > > > > avoid
> > > > > > > any new issues with live migration.
> > > > > > 
> > > > > > I guess "existing machine type" means e.g. pc-q35-2.11 and older 
> > > > > > ones,
> > > > > > rather than creating another machine type in parallel with q35.  So 
> > > > > > we
> > > > > > can set 48 bits as default on upcoming pc-q35-2.12 machines, while
> > > > > > keep the 39 bits on the old ones.
> > > > > > 
> > > > > > Please refer to include/hw/compat.h.
> > > > > 
> > > > > Thanks Peter, for the clarification and pointer to this. I am still
> > > > > new to this but learning on how this works or how this is used in
> > > > > use cases like Live Migration.
> > 
> > You can refer to similar commit, like 048a2e8869.
> > 
> > But, I think I was wrong - you should better do the addition to
> > include/hw/i386/pc.h (see e.g. PC_COMPAT_2_10) rather than compat.h,
> > since Intel vIOMMU is only used for PC machines.
> 
> Thanks, Peter. That sounds good. We can add the compatibility
> default value to PC_COMPAT_2_11. How does it work for older
> machine types like PC_COMPAT_2_10 and older?

It's inherited.

> > 
> > And... you may also need to create that PC_COMPAT_2_11 macro after
> > 2.11 is released.  For that you can refer to a6fd5b0e050a.
> > 
> > Anyway, I think this "set default" work can be postponed after recent
> > release, which can be a separate work besides current series.
> 
> OK. To be clear, are you suggesting that we can change the default
> value to 48 bits as a separate patch and not include it with the
> current patch set?
> 
> > 
> > > > > 
> > > > > Are you suggesting that we change the default to 48 bits in the
> > > > > next release (2.12)?
> > > > > 
> > > > > User need to specify an older machine type  (pc-q35-2.11 or older)
> > > > > to get the old default value of 39 bits. This still requires the
> > > > > patch I proposed to support compatibility for older releases
> > > > > except the introduction of the new property (x-aw-bits).
> > > > 
> > > > Yes. If you see a reason for users to limit it to 39 bits,
> > > > we can make it a supported property (not starting
> > > > with x-). If it's only for live migration, we can
> > > > use a non-supported property (with x-).
> > > I think it is only for Live Migration case, we need to
> > > support the old default value of 39 bits.
> > > 
> > > Do you see any need to keep a non-supported property?
> > > It may be useful for developers.
> > 
> > IMHO if it's for developers x-* would be good enough.  Thanks,
> > 
> 
> Sounds good.
> 
> Thanks.
> --Prasad



Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-12-01 Thread Prasad Singamsetty



On 11/29/2017 7:25 PM, Peter Xu wrote:

On Wed, Nov 29, 2017 at 01:05:22PM -0800, Prasad Singamsetty wrote:

Thanks Michael. Some comments below.

On 11/28/2017 9:32 AM, Michael S. Tsirkin wrote:

On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com wrote:

From: Prasad Singamsetty 

The current implementation of Intel IOMMU code only supports 39 bits
iova address width. This patch provides a new parameter (x-aw-bits)
for intel-iommu to extend its address width to 48 bits but keeping the
default the same (39 bits). The reason for not changing the default
is to avoid potential compatibility problems


You can change the default, just make it 39 for existing machine types.


I think introducing a new machine type is not appropriate as this
is an implementation limitation for the existing machine type.
Currently q35 is the only machine type that supports intel-iommu.
And we want to retain the current default behavior for q35 to avoid
any new issues with live migration.


I guess "existing machine type" means e.g. pc-q35-2.11 and older ones,
rather than creating another machine type in parallel with q35.  So we
can set 48 bits as default on upcoming pc-q35-2.12 machines, while
keep the 39 bits on the old ones.

Please refer to include/hw/compat.h.


Thanks Peter, for the clarification and pointer to this. I am still
new to this but learning on how this works or how this is used in
use cases like Live Migration.

Are you suggesting that we change the default to 48 bits in the
next release (2.12)?

User need to specify an older machine type  (pc-q35-2.11 or older)
to get the old default value of 39 bits. This still requires the
patch I proposed to support compatibility for older releases
except the introduction of the new property (x-aw-bits).









with live migration of
intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
parameter are 39 and 48.


I'd rather make it a boolean then.


Right. It seems Intel already has additional sizes supported so keeping
it as an integer seems better.


Yes, considering that 5-level IOMMUs are coming (AFAIK).



If we change the default value to 48 bits, I assume there is
no need for this property and user is expected to use an
older machine type based on the release to get the old
default. Is this correct?

Thanks.
--Prasad




Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-12-01 Thread Prasad Singamsetty



On 11/30/2017 8:43 PM, Peter Xu wrote:

On Thu, Nov 30, 2017 at 11:12:48AM -0800, Prasad Singamsetty wrote:



On 11/30/2017 10:56 AM, Michael S. Tsirkin wrote:

On Thu, Nov 30, 2017 at 10:33:50AM -0800, Prasad Singamsetty wrote:



On 11/29/2017 7:25 PM, Peter Xu wrote:

On Wed, Nov 29, 2017 at 01:05:22PM -0800, Prasad Singamsetty wrote:

Thanks Michael. Some comments below.

On 11/28/2017 9:32 AM, Michael S. Tsirkin wrote:

On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com wrote:

From: Prasad Singamsetty 

The current implementation of Intel IOMMU code only supports 39 bits
iova address width. This patch provides a new parameter (x-aw-bits)
for intel-iommu to extend its address width to 48 bits but keeping the
default the same (39 bits). The reason for not changing the default
is to avoid potential compatibility problems


You can change the default, just make it 39 for existing machine types.


I think introducing a new machine type is not appropriate as this
is an implementation limitation for the existing machine type.
Currently q35 is the only machine type that supports intel-iommu.
And we want to retain the current default behavior for q35 to avoid
any new issues with live migration.


I guess "existing machine type" means e.g. pc-q35-2.11 and older ones,
rather than creating another machine type in parallel with q35.  So we
can set 48 bits as default on upcoming pc-q35-2.12 machines, while
keep the 39 bits on the old ones.

Please refer to include/hw/compat.h.


Thanks Peter, for the clarification and pointer to this. I am still
new to this but learning on how this works or how this is used in
use cases like Live Migration.


You can refer to similar commit, like 048a2e8869.

But, I think I was wrong - you should better do the addition to
include/hw/i386/pc.h (see e.g. PC_COMPAT_2_10) rather than compat.h,
since Intel vIOMMU is only used for PC machines.


Thanks, Peter. That sounds good. We can add the compatibility
default value to PC_COMPAT_2_11. How does it work for older
machine types like PC_COMPAT_2_10 and older?



And... you may also need to create that PC_COMPAT_2_11 macro after
2.11 is released.  For that you can refer to a6fd5b0e050a.

Anyway, I think this "set default" work can be postponed after recent
release, which can be a separate work besides current series.


OK. To be clear, are you suggesting that we can change the default
value to 48 bits as a separate patch and not include it with the
current patch set?





Are you suggesting that we change the default to 48 bits in the
next release (2.12)?

User need to specify an older machine type  (pc-q35-2.11 or older)
to get the old default value of 39 bits. This still requires the
patch I proposed to support compatibility for older releases
except the introduction of the new property (x-aw-bits).


Yes. If you see a reason for users to limit it to 39 bits,
we can make it a supported property (not starting
with x-). If it's only for live migration, we can
use a non-supported property (with x-).

I think it is only for Live Migration case, we need to
support the old default value of 39 bits.

Do you see any need to keep a non-supported property?
It may be useful for developers.


IMHO if it's for developers x-* would be good enough.  Thanks,



Sounds good.

Thanks.
--Prasad



Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2017-12-03 Thread Peter Xu
On Fri, Dec 01, 2017 at 09:02:30AM -0800, Prasad Singamsetty wrote:

[...]

> 
> > 
> > And... you may also need to create that PC_COMPAT_2_11 macro after
> > 2.11 is released.  For that you can refer to a6fd5b0e050a.
> > 
> > Anyway, I think this "set default" work can be postponed after recent
> > release, which can be a separate work besides current series.
> 
> OK. To be clear, are you suggesting that we can change the default
> value to 48 bits as a separate patch and not include it with the
> current patch set?

Yes.  Thanks,

-- 
Peter Xu



Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2018-01-10 Thread Prasad Singamsetty


Hi Yi L,

On 12/1/2017 3:29 AM, Liu, Yi L wrote:

On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com wrote:

From: Prasad Singamsetty 

The current implementation of Intel IOMMU code only supports 39 bits
iova address width. This patch provides a new parameter (x-aw-bits)
for intel-iommu to extend its address width to 48 bits but keeping the
default the same (39 bits). The reason for not changing the default
is to avoid potential compatibility problems with live migration of
intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
parameter are 39 and 48.

After enabling larger address width (48), we should be able to map
larger iova addresses in the guest. For example, a QEMU guest that
is configured with large memory ( >=1TB ). To check whether 48 bits
aw is enabled, we can grep in the guest dmesg output with line:
"DMAR: Host address width 48".

Signed-off-by: Prasad Singamsetty 


Prasad,

Have you tested the scenario with physical device assigned to a guest?


Sorry for the long delay in following up on this.

I did some testing with vfio-pci devices assigned to the guest.
This is done on the latest qemu code base (2.11.50).

Here are the test cases/results:

1. Booting VM with one or two vfio-pci (network) devices
   and multiple memory size configs (up to 256G). Assigned pci
   devices (network interfaces) worked fine and no issues
   in using these devices. This test is run for both address
   widths (39 and 48).
2. If the guest VM is configured to use 512G and address
   width is the default 39 bits then guest OS fails to
   boot due to DMA failures. The same is observed without
   applying the patch set. The guest OS ends up booting into
   dracut shell. This problem is not seen if we set the address
   width to 48 bits. So, the patch set addresses a latent bug
   with large memory config.

ISSUE - VM could take long time to boot with vfio-pci devices

Qemu process could take a long time to initialize the VM
when vfio-pci device is configured depending on the
memory size. For small memory sizes (less than 32G) it is
not noticeable (<30s). For larger memory sizes, the delay ranges
from several minutes and longer (2-40min). For more than 512G, qemu
process appears to hang but can be interrupted. This behavior
is observed without patch set applied also. The slowness is due
to VFIO_IOMMU_MAP_DMA ioctl taking long time to map the
system ram assigned to the guest. This is when qemu process
is initializing the vfio device where it maps all the assigned
ram memory regions. Here is the stack trace from gdb:

#0  vfio_dma_map (container=0x582709d0, iova=4294967296,
  size=547608330240, vaddr=0x7f7fd3e0,
  readonly=false)
at /home/psingams/qemu-upstream-v2/hw/vfio/common.c:250
#1  0x5584f471 in vfio_listener_region_add(
  listener=0x582709e0,
  section=0x7fffc7f0)
at /home/psingams/qemu-upstream-v2/hw/vfio/common.c:521
#2  0x557f08fc in listener_add_address_space (
  listener=0x582709e0, as=0x5813b790)
at /home/psingams/qemu-upstream-v2/memory.c:2600
#3  0x557f0bbe in memory_listener_register (
  listener=0x582709e0, as=0x5813b790)
at /home/psingams/qemu-upstream-v2/memory.c:2643
#4  0x558511ef in vfio_connect_container (group=0x58270960,
  as=0x5813b790, errp=0x7fffdae8)
at /home/psingams/qemu-upstream-v2/hw/vfio/common.c:1130

(gdb) print/x size
$2 = 0x7f8000

This is before guest OS gets to boot. The host is running 4.15.0-rc6
kernel with qemu version 2.11.50.

I am not sure if this is a known issue and someone is already
working on fixing the implementation of VFIO_IOMMU_MAP_DMA ioctl.

This issue is not related to this patch set and need to be
investigated separately.

Please let me know if there are other comments on this patch set.

Regards,
--Prasad



Regards,
Yi L

---
  hw/i386/acpi-build.c   |   3 +-
  hw/i386/intel_iommu.c  | 101 -
  hw/i386/intel_iommu_internal.h |   9 ++--
  include/hw/i386/intel_iommu.h  |   1 +
  4 files changed, 65 insertions(+), 49 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 73519ab3ac..537957c89a 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2460,6 +2460,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
  AcpiDmarDeviceScope *scope = NULL;
  /* Root complex IOAPIC use one path[0] only */
  size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
+IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
  
  assert(iommu);

  if (iommu->intr_supported) {
@@ -2467,7 +2468,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
  }
  
  dmar = acpi_data_push(table_data, sizeof(*dmar));

-dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
+dmar->host_ad

Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2018-01-10 Thread Liu, Yi L
> -Original Message-
> From: Qemu-devel [mailto:qemu-devel-bounces+yi.l.liu=intel@nongnu.org] On
> Behalf Of Prasad Singamsetty
> Sent: Thursday, January 11, 2018 8:06 AM
> To: Liu, Yi L 
> Cc: ehabk...@redhat.com; m...@redhat.com; konrad.w...@oracle.com; qemu-
> de...@nongnu.org; pet...@redhat.com; imamm...@redhat.com;
> pbonz...@redhat.com; r...@twiddle.net
> Subject: Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 
> 48
> bits
> 
> 
> Hi Yi L,
> 
> On 12/1/2017 3:29 AM, Liu, Yi L wrote:
> > On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com
> wrote:
> >> From: Prasad Singamsetty 
> >>
> >> The current implementation of Intel IOMMU code only supports 39 bits
> >> iova address width. This patch provides a new parameter (x-aw-bits)
> >> for intel-iommu to extend its address width to 48 bits but keeping
> >> the default the same (39 bits). The reason for not changing the
> >> default is to avoid potential compatibility problems with live
> >> migration of intel-iommu enabled QEMU guest. The only valid values for 
> >> 'x-aw-
> bits'
> >> parameter are 39 and 48.
> >>
> >> After enabling larger address width (48), we should be able to map
> >> larger iova addresses in the guest. For example, a QEMU guest that is
> >> configured with large memory ( >=1TB ). To check whether 48 bits aw
> >> is enabled, we can grep in the guest dmesg output with line:
> >> "DMAR: Host address width 48".
> >>
> >> Signed-off-by: Prasad Singamsetty 
> >
> > Prasad,
> >
> > Have you tested the scenario with physical device assigned to a guest?
> 
> Sorry for the long delay in following up on this.
> 
> I did some testing with vfio-pci devices assigned to the guest.
> This is done on the latest qemu code base (2.11.50).
> 
> Here are the test cases/results:
> 
> 1. Booting VM with one or two vfio-pci (network) devices
> and multiple memory size configs (up to 256G). Assigned pci
> devices (network interfaces) worked fine and no issues
> in using these devices. This test is run for both address
> widths (39 and 48).
> 2. If the guest VM is configured to use 512G and address
> width is the default 39 bits then guest OS fails to
> boot due to DMA failures. The same is observed without
> applying the patch set. The guest OS ends up booting into
> dracut shell. This problem is not seen if we set the address
> width to 48 bits. So, the patch set addresses a latent bug
> with large memory config.
> 
> ISSUE - VM could take long time to boot with vfio-pci devices
> 
> Qemu process could take a long time to initialize the VM when vfio-pci device 
> is
> configured depending on the memory size. For small memory sizes (less than 
> 32G) it
> is not noticeable (<30s). For larger memory sizes, the delay ranges from 
> several
> minutes and longer (2-40min). For more than 512G, qemu process appears to hang
> but can be interrupted. This behavior is observed without patch set applied 
> also. The
> slowness is due to VFIO_IOMMU_MAP_DMA ioctl taking long time to map the
> system ram assigned to the guest. This is when qemu process is initializing 
> the vfio
> device where it maps all the assigned ram memory regions. Here is the stack 
> trace
> from gdb:
> 
> #0  vfio_dma_map (container=0x582709d0, iova=4294967296,
>size=547608330240, vaddr=0x7f7fd3e0,
>readonly=false)
>  at /home/psingams/qemu-upstream-v2/hw/vfio/common.c:250
> #1  0x5584f471 in vfio_listener_region_add(
>listener=0x582709e0,
>section=0x7fffc7f0)
>  at /home/psingams/qemu-upstream-v2/hw/vfio/common.c:521
> #2  0x557f08fc in listener_add_address_space (
>listener=0x582709e0, as=0x5813b790)
>  at /home/psingams/qemu-upstream-v2/memory.c:2600
> #3  0x557f0bbe in memory_listener_register (
>listener=0x582709e0, as=0x5813b790)
>  at /home/psingams/qemu-upstream-v2/memory.c:2643
> #4  0x558511ef in vfio_connect_container (group=0x58270960,
>as=0x5813b790, errp=0x7fffdae8)
>  at /home/psingams/qemu-upstream-v2/hw/vfio/common.c:1130
> 
> (gdb) print/x size
> $2 = 0x7f8000
> 
> This is before guest OS gets to boot. The host is running 4.15.0-rc6 kernel 
> with qemu
> version 2.11.50.
> 
> I am not sure if this is a known issue and someone is already working on 
> fixing the
> implementation of VFIO_IOMMU_MAP_DMA ioctl.

It seems to be same issue with the one reported by Bob.
https://lists.gnu.org/archive/html/qemu-devel/2017-12/msg05098.html

Per chatted with them, the reason looks to be no enough memory in host. how 
about
the memory size in your host?

> This issue is not related to this patch set and need to be investigated 
> separately.
> 
> Please let me know if there are other comments on this patch set.
> 

Regards,
Yi L


Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48 bits

2018-01-11 Thread Prasad Singamsetty



On 1/10/2018 6:46 PM, Liu, Yi L wrote:

-Original Message-
From: Qemu-devel [mailto:qemu-devel-bounces+yi.l.liu=intel@nongnu.org] On
Behalf Of Prasad Singamsetty
Sent: Thursday, January 11, 2018 8:06 AM
To: Liu, Yi L 
Cc: ehabk...@redhat.com; m...@redhat.com; konrad.w...@oracle.com; qemu-
de...@nongnu.org; pet...@redhat.com; imamm...@redhat.com;
pbonz...@redhat.com; r...@twiddle.net
Subject: Re: [Qemu-devel] [PATCH v1 2/2] intel-iommu: Extend address width to 48
bits


Hi Yi L,

On 12/1/2017 3:29 AM, Liu, Yi L wrote:

On Tue, Nov 14, 2017 at 06:13:50PM -0500, prasad.singamse...@oracle.com

wrote:

From: Prasad Singamsetty 

The current implementation of Intel IOMMU code only supports 39 bits
iova address width. This patch provides a new parameter (x-aw-bits)
for intel-iommu to extend its address width to 48 bits but keeping
the default the same (39 bits). The reason for not changing the
default is to avoid potential compatibility problems with live
migration of intel-iommu enabled QEMU guest. The only valid values for 'x-aw-

bits'

parameter are 39 and 48.

After enabling larger address width (48), we should be able to map
larger iova addresses in the guest. For example, a QEMU guest that is
configured with large memory ( >=1TB ). To check whether 48 bits aw
is enabled, we can grep in the guest dmesg output with line:
"DMAR: Host address width 48".

Signed-off-by: Prasad Singamsetty 


Prasad,

Have you tested the scenario with physical device assigned to a guest?


Sorry for the long delay in following up on this.

I did some testing with vfio-pci devices assigned to the guest.
This is done on the latest qemu code base (2.11.50).

Here are the test cases/results:

1. Booting VM with one or two vfio-pci (network) devices
 and multiple memory size configs (up to 256G). Assigned pci
 devices (network interfaces) worked fine and no issues
 in using these devices. This test is run for both address
 widths (39 and 48).
2. If the guest VM is configured to use 512G and address
 width is the default 39 bits then guest OS fails to
 boot due to DMA failures. The same is observed without
 applying the patch set. The guest OS ends up booting into
 dracut shell. This problem is not seen if we set the address
 width to 48 bits. So, the patch set addresses a latent bug
 with large memory config.

ISSUE - VM could take long time to boot with vfio-pci devices

Qemu process could take a long time to initialize the VM when vfio-pci device is
configured depending on the memory size. For small memory sizes (less than 32G) 
it
is not noticeable (<30s). For larger memory sizes, the delay ranges from several
minutes and longer (2-40min). For more than 512G, qemu process appears to hang
but can be interrupted. This behavior is observed without patch set applied 
also. The
slowness is due to VFIO_IOMMU_MAP_DMA ioctl taking long time to map the
system ram assigned to the guest. This is when qemu process is initializing the 
vfio
device where it maps all the assigned ram memory regions. Here is the stack 
trace
from gdb:

#0  vfio_dma_map (container=0x582709d0, iova=4294967296,
size=547608330240, vaddr=0x7f7fd3e0,
readonly=false)
  at /home/psingams/qemu-upstream-v2/hw/vfio/common.c:250
#1  0x5584f471 in vfio_listener_region_add(
listener=0x582709e0,
section=0x7fffc7f0)
  at /home/psingams/qemu-upstream-v2/hw/vfio/common.c:521
#2  0x557f08fc in listener_add_address_space (
listener=0x582709e0, as=0x5813b790)
  at /home/psingams/qemu-upstream-v2/memory.c:2600
#3  0x557f0bbe in memory_listener_register (
listener=0x582709e0, as=0x5813b790)
  at /home/psingams/qemu-upstream-v2/memory.c:2643
#4  0x558511ef in vfio_connect_container (group=0x58270960,
as=0x5813b790, errp=0x7fffdae8)
  at /home/psingams/qemu-upstream-v2/hw/vfio/common.c:1130

(gdb) print/x size
$2 = 0x7f8000

This is before guest OS gets to boot. The host is running 4.15.0-rc6 kernel 
with qemu
version 2.11.50.

I am not sure if this is a known issue and someone is already working on fixing 
the
implementation of VFIO_IOMMU_MAP_DMA ioctl.


It seems to be same issue with the one reported by Bob.
https://lists.gnu.org/archive/html/qemu-devel/2017-12/msg05098.html

Per chatted with them, the reason looks to be no enough memory in host. how 
about
the memory size in your host?


The host system has 1.2TB memory and just one VM with one vfio-pci
device assigned to it. I don't think it is the same issue as not
enough memory.

Regards,
--Prasad




This issue is not related to this patch set and need to be investigated 
separately.

Please let me know if there are other comments on this patch set.



Regards,
Yi L