On 2017年04月17日 18:58, Peter Xu wrote:
Signed-off-by: Peter Xu <pet...@redhat.com>
---
hw/i386/intel_iommu.c | 109 +++++++++++++++++++++++++++++++++--------
hw/i386/intel_iommu_internal.h | 1 +
hw/i386/trace-events | 1 +
hw/i386/x86-iommu.c | 1 +
include/hw/i386/x86-iommu.h | 1 +
5 files changed, 93 insertions(+), 20 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 05ae631..deb2007 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -872,7 +872,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s,
uint8_t bus_num,
} else {
switch (vtd_ce_get_type(ce)) {
case VTD_CONTEXT_TT_MULTI_LEVEL:
- /* fall through */
+ case VTD_CONTEXT_TT_PASS_THROUGH:
case VTD_CONTEXT_TT_DEV_IOTLB:
break;
default:
@@ -883,6 +883,73 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s,
uint8_t bus_num,
return 0;
}
+/* Fetch translation type for specific device. Returns <0 if error
+ * happens, otherwise return the shifted type to check against
+ * VTD_CONTEXT_TT_*. */
+static int vtd_dev_get_trans_type(VTDAddressSpace *as)
+{
+ IntelIOMMUState *s;
+ VTDContextEntry ce;
+ int ret;
+
+ s = as->iommu_state;
+
+ ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus),
+ as->devfn, &ce);
+ if (ret) {
+ return ret;
+ }
+
+ return vtd_ce_get_type(&ce);
+}
+
+static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
+{
+ int ret;
+
+ assert(as);
+
+ ret = vtd_dev_get_trans_type(as);
+ if (ret < 0) {
+ /*
+ * Possibly failed to parse the context entry for some reason
+ * (e.g., during init, or any guest configuration errors on
+ * context entries). We should assume PT not enabled for
+ * safety.
+ */
+ return false;
+ }
+
+ return ret == VTD_CONTEXT_TT_PASS_THROUGH;
+}
+
+static void vtd_switch_address_space(VTDAddressSpace *as)
+{
+ bool use_iommu;
+
+ assert(as);
+
+ use_iommu = as->iommu_state->dmar_enabled;
+ if (use_iommu) {
+ /* Further checks per-device configuration */
+ use_iommu &= !vtd_dev_pt_enabled(as);
+ }
Looks like you can use as->iommu_state->dmar_enabled &&
!vtd_dev_pt_enabled(as)
+
+ trace_vtd_switch_address_space(pci_bus_num(as->bus),
+ VTD_PCI_SLOT(as->devfn),
+ VTD_PCI_FUNC(as->devfn),
+ use_iommu);
+
+ /* Turn off first then on the other */
+ if (use_iommu) {
+ memory_region_set_enabled(&as->sys_alias, false);
+ memory_region_set_enabled(&as->iommu, true);
+ } else {
+ memory_region_set_enabled(&as->iommu, false);
+ memory_region_set_enabled(&as->sys_alias, true);
+ }
+}
+
static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
{
return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
@@ -991,6 +1058,18 @@ static void vtd_do_iommu_translate(VTDAddressSpace
*vtd_as, PCIBus *bus,
cc_entry->context_cache_gen = s->context_cache_gen;
}
+ /*
+ * We don't need to translate for pass-through context entries.
+ * Also, let's ignore IOTLB caching as well for PT devices.
+ */
+ if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
+ entry->translated_addr = entry->iova;
+ entry->addr_mask = VTD_PAGE_SIZE - 1;
+ entry->perm = IOMMU_RW;
+ trace_vtd_translate_pt(source_id, entry->iova);
+ return;
+ }
Several questions here:
1) Is this just for vhost?
2) Since this is done after IOTLB querying, do we need flush IOTLB
during address switching?
+
ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
&reads, &writes);
if (ret_fr) {
@@ -1135,6 +1214,11 @@ static void
vtd_context_device_invalidate(IntelIOMMUState *s,
VTD_PCI_FUNC(devfn_it));
vtd_as->context_cache_entry.context_cache_gen = 0;
/*
+ * Do switch address space when needed, in case if the
+ * device passthrough bit is switched.
+ */
+ vtd_switch_address_space(vtd_as);
+ /*
* So a device is moving out of (or moving into) a
* domain, a replay() suites here to notify all the
* IOMMU_NOTIFIER_MAP registers about this change.
@@ -1366,25 +1450,6 @@ static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
}
-static void vtd_switch_address_space(VTDAddressSpace *as)
-{
- assert(as);
-
- trace_vtd_switch_address_space(pci_bus_num(as->bus),
- VTD_PCI_SLOT(as->devfn),
- VTD_PCI_FUNC(as->devfn),
- as->iommu_state->dmar_enabled);
-
- /* Turn off first then on the other */
- if (as->iommu_state->dmar_enabled) {
- memory_region_set_enabled(&as->sys_alias, false);
- memory_region_set_enabled(&as->iommu, true);
- } else {
- memory_region_set_enabled(&as->iommu, false);
- memory_region_set_enabled(&as->sys_alias, true);
- }
-}
-
static void vtd_switch_address_space_all(IntelIOMMUState *s)
{
GHashTableIter iter;
@@ -2849,6 +2914,10 @@ static void vtd_init(IntelIOMMUState *s)
s->ecap |= VTD_ECAP_DT;
}
+ if (x86_iommu->pt_supported) {
+ s->ecap |= VTD_ECAP_PT;
+ }
Since we support migration now, need compat this for pre 2.10.
+
if (s->caching_mode) {
s->cap |= VTD_CAP_CM;
}
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 29d6707..0e73a65 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -187,6 +187,7 @@
/* Interrupt Remapping support */
#define VTD_ECAP_IR (1ULL << 3)
#define VTD_ECAP_EIM (1ULL << 4)
+#define VTD_ECAP_PT (1ULL << 6)
#define VTD_ECAP_MHMV (15ULL << 20)
/* CAP_REG */
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 04a6980..867ad0b 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -38,6 +38,7 @@ vtd_page_walk_skip_perm(uint64_t iova, uint64_t next) "Page walk
skip iova 0x%"P
vtd_page_walk_skip_reserve(uint64_t iova, uint64_t next) "Page walk skip iova 0x%"PRIx64" -
0x%"PRIx64" due to rsrv set"
vtd_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device
%02x:%02x.%x switching address space (iommu enabled=%d)"
vtd_as_unmap_whole(uint8_t bus, uint8_t slot, uint8_t fn, uint64_t iova, uint64_t size)
"Device %02x:%02x.%x start 0x%"PRIx64" size 0x%"PRIx64
+vtd_translate_pt(uint16_t sid, uint64_t addr) "source id 0x%"PRIu16", iova
0x%"PRIx64
# hw/i386/amd_iommu.c
amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr
0x%"PRIx64" + offset 0x%"PRIx32
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index 02b8825..293caf8 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -91,6 +91,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
static Property x86_iommu_properties[] = {
DEFINE_PROP_BOOL("intremap", X86IOMMUState, intr_supported, false),
DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false),
+ DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true),
Do you know if AMD IOMMU support this?
Thanks
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
index 361c07c..ef89c0c 100644
--- a/include/hw/i386/x86-iommu.h
+++ b/include/hw/i386/x86-iommu.h
@@ -74,6 +74,7 @@ struct X86IOMMUState {
SysBusDevice busdev;
bool intr_supported; /* Whether vIOMMU supports IR */
bool dt_supported; /* Whether vIOMMU supports DT */
+ bool pt_supported; /* Whether vIOMMU supports pass-through */
IommuType type; /* IOMMU type - AMD/Intel */
QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
};