From: Jan Kiszka <jan.kis...@siemens.com> Still a bit hacky, unconditionally enabled (must become opt-in, not available with in-kernel irqchip), not reporting faults properly - but it works! And revealed a Linux bug [1]
[1] http://thread.gmane.org/gmane.linux.kernel/1766261 Signed-off-by: Rita Sinha <rita.sinh...@gmail.com> --- hw/i386/acpi-build.c | 28 ++++++- hw/i386/intel_iommu.c | 162 ++++++++++++++++++++++++++++++++++++++++- hw/i386/intel_iommu_internal.h | 27 +++++++ hw/intc/apic.c | 1 + hw/pci-host/q35.c | 11 +++ include/hw/acpi/acpi-defs.h | 22 ++++++ include/hw/i386/intel_iommu.h | 7 ++ 7 files changed, 252 insertions(+), 6 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 52c9470..ef43122 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -68,6 +68,9 @@ #define ACPI_BUILD_TABLE_SIZE 0x20000 +#define ACPI_BUILD_IOAPIC_ID 0x0 +#define ACPI_BUILD_HPET_ID 0x0 + /* #define DEBUG_ACPI_BUILD */ #ifdef DEBUG_ACPI_BUILD #define ACPI_BUILD_DPRINTF(fmt, ...) \ @@ -392,7 +395,6 @@ build_madt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu) io_apic = acpi_data_push(table_data, sizeof *io_apic); io_apic->type = ACPI_APIC_IO; io_apic->length = sizeof(*io_apic); -#define ACPI_BUILD_IOAPIC_ID 0x0 io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID; io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS); io_apic->interrupt = cpu_to_le32(0); @@ -2302,6 +2304,7 @@ build_hpet(GArray *table_data, GArray *linker) */ hpet->timer_block_id = cpu_to_le32(0x8086a201); hpet->addr.address = cpu_to_le64(HPET_BASE); + hpet->hpet_number = ACPI_BUILD_HPET_ID; build_header(linker, table_data, (void *)hpet, "HPET", sizeof(*hpet), 1, NULL, NULL); } @@ -2496,19 +2499,38 @@ build_dmar_q35(GArray *table_data, GArray *linker) AcpiTableDmar *dmar; AcpiDmarHardwareUnit *drhd; + AcpiDmarDeviceScope *dev_scope; dmar = acpi_data_push(table_data, sizeof(*dmar)); dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; - dmar->flags = 0; /* No intr_remap for now */ + dmar->flags = ACPI_DMAR_INTR_REMAP; /* DMAR Remapping Hardware Unit Definition structure */ drhd = acpi_data_push(table_data, sizeof(*drhd)); drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); - drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */ + drhd->length = cpu_to_le16(sizeof(*drhd) + (sizeof(*dev_scope) + 2) * 2); drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; drhd->pci_segment = cpu_to_le16(0); drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); + /* Device Scope structures for IOAPIC */ + dev_scope = acpi_data_push(table_data, sizeof(*dev_scope) + 2); + dev_scope->type = ACPI_DMAR_SCOPE_TYPE_IOAPIC; + dev_scope->length = sizeof(*dev_scope) + 2; + dev_scope->enumeration_id = ACPI_BUILD_IOAPIC_ID; + dev_scope->start_bus_number = Q35_PSEUDO_BUS_PLATFORM; + dev_scope->path[0] = PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC); + dev_scope->path[1] = PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC); + + /* Device Scope structures for HPET */ + dev_scope = acpi_data_push(table_data, sizeof(*dev_scope) + 2); + dev_scope->type = ACPI_DMAR_SCOPE_TYPE_HPET; + dev_scope->length = sizeof(*dev_scope) + 2; + dev_scope->enumeration_id = ACPI_BUILD_HPET_ID; + dev_scope->start_bus_number = Q35_PSEUDO_BUS_PLATFORM; + dev_scope->path[0] = PCI_SLOT(Q35_PSEUDO_DEVFN_HPET); + dev_scope->path[1] = PCI_FUNC(Q35_PSEUDO_DEVFN_HPET); + build_header(linker, table_data, (void *)(table_data->data + dmar_start), "DMAR", table_data->len - dmar_start, 1, NULL, NULL); } diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index c371588..2ea642c 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -20,6 +20,7 @@ */ #include "qemu/osdep.h" +#include "hw/i386/apic-msidef.h" #include "hw/sysbus.h" #include "exec/address-spaces.h" #include "intel_iommu_internal.h" @@ -30,10 +31,11 @@ #ifdef DEBUG_INTEL_IOMMU enum { DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, - DEBUG_CACHE, + DEBUG_CACHE, DEBUG_IR }; #define VTD_DBGBIT(x) (1 << DEBUG_##x) -static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR); +static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR) | + VTD_DBGBIT(IR); #define VTD_DPRINTF(what, fmt, ...) do { \ if (vtd_dbgflags & VTD_DBGBIT(what)) { \ @@ -1134,6 +1136,31 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) } /* Set Root Table Pointer */ +static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s) +{ + VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer"); + + s->irta = vtd_get_quad_raw(s, DMAR_IRTA_REG); + s->irt_size = 2 << (s->irta & VTD_IRTA_SIZE_MASK); + s->irta &= VTD_IRTA_ADDR_MASK; + /* Ok - report back to driver */ + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS); +} + +static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en) +{ + VTD_DPRINTF(IR, "Interrupt Remapping Enable %s", (en ? "on" : "off")); + + if (en) { + s->ir_enabled = true; + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES); + } else { + s->ir_enabled = false; + vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0); + } +} + +/* Set Root Table Pointer */ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) { VTD_DPRINTF(CSR, "set Root Table Pointer"); @@ -1182,6 +1209,12 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s) /* Queued Invalidation Enable */ vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE); } + if (val & VTD_GCMD_SIRTP) { + vtd_handle_gcmd_sirtp(s); + } + if (changed & VTD_GCMD_IRE) { + vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE); + } } /* Handle write to Context Command Register */ @@ -1406,6 +1439,11 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_INT: + VTD_DPRINTF(INV, "Interrupt Entry Invalidate Descriptor hi 0x%"PRIx64 + " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo); + break; + default: VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type " "hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8, @@ -1762,6 +1800,24 @@ static void vtd_mem_write(void *opaque, hwaddr addr, vtd_handle_ics_write(s); break; + /* Interrupt Remapping Table Address Register, 64-bit */ + case DMAR_IRTA_REG: + VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + if (size == 4) { + vtd_set_long(s, addr, val); + } else { + vtd_set_quad(s, addr, val); + } + break; + + case DMAR_IRTA_REG_HI: + VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64 + ", size %d, val 0x%"PRIx64, addr, size, val); + assert(size == 4); + vtd_set_long(s, addr, val); + break; + /* Invalidation Event Control Register, 32-bit */ case DMAR_IECTL_REG: VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64 @@ -1858,6 +1914,15 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, .perm = IOMMU_NONE, }; + if (s->ir_enabled && vtd_is_interrupt_addr(addr)) { + ret.target_as = &vtd_as->int_remap_as; + ret.iova = addr; + ret.translated_addr = addr; + ret.addr_mask = ~(hwaddr)0x3; + ret.perm = IOMMU_WO; + return ret; + } + if (!s->dmar_enabled) { /* DMAR disabled, passthrough, use 4k-page*/ ret.iova = addr & VTD_PAGE_MASK_4K; @@ -1877,6 +1942,93 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr, return ret; } +static int get_int_remap_entry(IntelIOMMUState *s, uint16_t index, + VTDIntRemapEntry *irte) +{ + dma_addr_t addr; + + if (index >= s->irt_size) { + VTD_DPRINTF(IR, "error: IR table index %d out of range", index); + return -1; + } + + addr = s->irta + index * sizeof(*irte); + + if (dma_memory_read(get_dma_address_space(), addr, irte, sizeof(*irte))) { + VTD_DPRINTF(IR, "error: failed to access IR table at 0x%"PRIx64 + " + %"PRIu32, s->irta, index); + return -1;/*-VTD_FR_CONTEXT_TABLE_INV*/; + } + + irte->lo = le64_to_cpu(irte->lo); + irte->hi = le64_to_cpu(irte->hi); + + return 0; +} + +static void vtd_int_remap_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + uint16_t index = ((addr >> 5) & 0x7fff) | ((addr << 13) & 8000); + VTDAddressSpace *vtd_as = opaque; + IntelIOMMUState *s = vtd_as->iommu_state; + VTDIntRemapEntry irte; + uint8_t bus_num = pci_bus_num(vtd_as->bus); + int ret; + + if (!(addr & (1 << 4))) { + printf("compat MSI, blocked\n"); + return; + } + + if (addr & (1 << 3)) { + index += val & 0xffff; + } + ret = get_int_remap_entry(s, index, &irte); + if (ret) { + printf("error\n"); + return; + } + /*printf("IRTE %d: %016lx_%016lx\n", index, irte.raw[1], irte.raw[0]);*/ + + /* + * TODO: + * - proper error reporting, including FPD evaluation + * - check for reserved bits + * - SQ & SVT evaluation + */ + if (!VTD_IRTE_LO_P(irte.lo)) { + printf("IRTE not present\n"); + return; + } + if (vtd_make_source_id(bus_num, vtd_as->devfn) != + VTD_IRTE_HI_SID(irte.hi)) { + printf("SID mismatch\n"); + return; + } + + addr = MSI_ADDR_BASE | + (VTD_IRTE_LO_DEST_MODE(irte.lo) << MSI_ADDR_DEST_MODE_SHIFT) | + (VTD_IRTE_LO_REDIR_HINT(irte.lo) << MSI_ADDR_REDIRECTION_SHIFT) | + (VTD_IRTE_LO_DEST(irte.lo) << MSI_ADDR_DEST_IDX_SHIFT); + val = VTD_IRTE_LO_VECTOR(irte.lo) | + (VTD_IRTE_LO_DELIVERY(irte.lo) << MSI_DATA_DELIVERY_MODE_SHIFT) | + (1 << MSI_DATA_LEVEL_SHIFT) | + (VTD_IRTE_LO_TRIGGER_MODE(irte.lo) << MSI_DATA_TRIGGER_SHIFT); + /*printf("MSI: %08lx:%04lx\n", addr, val);*/ + stl_le_phys(get_dma_address_space(), addr, val); +} + +const MemoryRegionOps vtd_int_remap_ops = { + .write = vtd_int_remap_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + + static const VMStateDescription vtd_vmstate = { .name = "iommu-intel", .unmigratable = 1, @@ -1954,9 +2106,12 @@ static void vtd_init(IntelIOMMUState *s) s->qi_enabled = false; s->iq_last_desc_type = VTD_INV_DESC_NONE; s->next_frcd_reg = 0; + s->irta = 0; + s->irt_size = 0; + s->ir_enabled = false; s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW | VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS; - s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; + s->ecap = VTD_ECAP_QI | VTD_ECAP_IR | VTD_ECAP_IRO | VTD_ECAP_MHMV; vtd_reset_context_cache(s); vtd_reset_iotlb(s); @@ -1993,6 +2148,7 @@ static void vtd_init(IntelIOMMUState *s) vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0); vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0); vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL); + vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff00fULL, 0); vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0); vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0); vtd_define_long(s, DMAR_IEADDR_REG, 0, 0xfffffffcUL, 0); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index e5f514c..e0671ee 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -173,8 +173,10 @@ #define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) /* ECAP_REG */ +#define VTD_ECAP_MHMV (0xf << 20) /* (offset >> 4) << 8 */ #define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4) +#define VTD_ECAP_IR (1ULL << 3) #define VTD_ECAP_QI (1ULL << 1) /* CAP_REG */ @@ -214,6 +216,11 @@ /* ICS_REG */ #define VTD_ICS_IWC 1UL +/* IRTA_REG */ +#define VTD_IRTA_SIZE_MASK (0xf) +#define VTD_IRTA_EIME (1ULL << 11) +#define VTD_IRTA_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) + /* IECTL_REG */ #define VTD_IECTL_IM (1UL << 31) #define VTD_IECTL_IP (1UL << 30) @@ -286,6 +293,7 @@ typedef struct VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_TYPE 0xf #define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */ #define VTD_INV_DESC_IOTLB 0x2 +#define VTD_INV_DESC_INT 0x4 #define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */ #define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */ @@ -388,4 +396,23 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SL_PT_BASE_ADDR_MASK (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK) #define VTD_SL_IGN_COM 0xbff0000000000000ULL +/* Interrupt Remapping Table Entry */ +struct VTDIntRemapEntry { + uint64_t lo; + uint64_t hi; +}; +typedef struct VTDIntRemapEntry VTDIntRemapEntry; + +/* IRTE low word */ +#define VTD_IRTE_LO_P(val) (((val) >> 0) & 0x01) +#define VTD_IRTE_LO_DEST_MODE(val) (((val) >> 2) & 0x01) +#define VTD_IRTE_LO_REDIR_HINT(val) (((val) >> 3) & 0x01) +#define VTD_IRTE_LO_TRIGGER_MODE(val) (((val) >> 4) & 0x01) +#define VTD_IRTE_LO_DELIVERY(val) (((val) >> 5) & 0x07) +#define VTD_IRTE_LO_VECTOR(val) (((val) >> 16) & 0xff) +#define VTD_IRTE_LO_DEST(val) (((val) >> 32) & 0xffffffff) + +/* IRTE high word */ +#define VTD_IRTE_HI_SID(val) (((val) >> 0) & 0xffff) + #endif diff --git a/hw/intc/apic.c b/hw/intc/apic.c index 2e99f75..1fb4faa 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -846,6 +846,7 @@ static void msi_region_write(void *opaque, hwaddr addr, uint64_t data, uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7; /* FIXME: Ignoring redirection hint. */ + /*printf("APIC: %08lx:%04lx\n", addr, data);*/ apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode); } diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 566e3d8..f7adc8e 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -431,6 +431,17 @@ static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX); vtd_as = vtd_find_add_as(s, bus, devfn); + + memory_region_init_iommu(&vtd_as->iommu, OBJECT(s), + &s->iommu_ops, "intel_iommu", UINT64_MAX); + address_space_init(&vtd_as->as, + &vtd_as->iommu, "intel_iommu"); + memory_region_init_io(&vtd_as->int_remap_region, OBJECT(s), + &vtd_int_remap_ops, vtd_as, + "intel_int_remap", UINT64_MAX); + address_space_init(&vtd_as->int_remap_as, + &vtd_as->int_remap_region, + "intel_int_remap"); return &vtd_as->as; } diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h index c7a03d4..220d784 100644 --- a/include/hw/acpi/acpi-defs.h +++ b/include/hw/acpi/acpi-defs.h @@ -570,4 +570,26 @@ typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit; /* Masks for Flags field above */ #define ACPI_DMAR_INCLUDE_PCI_ALL 1 +/* DMAR Device Scope structures */ +struct AcpiDmarDeviceScope { + uint8_t type; + uint8_t length; + uint16_t reserved; + uint8_t enumeration_id; + uint8_t start_bus_number; + uint8_t path[0]; +} QEMU_PACKED; +typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope; + +/* Values for type in struct AcpiDmarDeviceScope */ +enum { + ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, + ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, + ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, + ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, + ACPI_DMAR_SCOPE_TYPE_HPET = 4, + ACPI_DMAR_SCOPE_TYPE_ACPI = 5, + ACPI_DMAR_SCOPE_TYPE_RESERVED = 6 /* Reserved for future use */ +}; + #endif diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index b024ffa..75a3627 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -70,6 +70,8 @@ struct VTDAddressSpace { uint8_t devfn; AddressSpace as; MemoryRegion iommu; + AddressSpace int_remap_as; + MemoryRegion int_remap_region; IntelIOMMUState *iommu_state; VTDContextCacheEntry context_cache_entry; }; @@ -109,6 +111,10 @@ struct IntelIOMMUState { bool qi_enabled; /* Set if the QI is enabled */ uint8_t iq_last_desc_type; /* The type of last completed descriptor */ + dma_addr_t irta; + unsigned int irt_size; + bool ir_enabled; + /* The index of the Fault Recording Register to be used next. * Wraps around from N-1 to 0, where N is the number of FRCD_REG. */ @@ -125,6 +131,7 @@ struct IntelIOMMUState { VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */ }; +extern const MemoryRegionOps vtd_int_remap_ops; /* Find the VTD Address space associated with the given bus pointer, * create a new one if none exists */ -- 2.7.2