On Fri, Aug 12, 2016 at 11:08 PM, Valentine Sinitsyn < valentine.sinit...@gmail.com> wrote:
> On 11.08.2016 00:42, David Kiarie wrote: > >> Introduce AMD IOMMU interrupt remapping and hook it onto > > >> +static inline int amdvi_ir_pass(MSIMessage *src, MSIMessage *dst, >> uint8_t bit, >> + uint64_t dte) >> > The name is misleading. Actually, this function handles non-vectored > interrupts (either passes or target aborts them). Maybe call it > amdvi_ir_handle_non_vectored() ? > > +{ >> + if ((dte & (1UL << bit))) { >> + /* passing interrupt enabled */ >> + dst->address = src->address; >> + dst->data = src->data; >> + } else { >> + /* should be target aborted */ >> + return -AMDVI_TARGET_ABORT; >> + } >> + return 0; >> +} >> + >> +static int amdvi_remap_ir_intctl(uint64_t dte, uint32_t irte, >> + MSIMessage *src, MSIMessage *dst) >> +{ >> + int ret = 0; >> + >> + switch ((dte >> AMDVI_DTE_INTCTL ) & 3UL) { >> > AMDVI_DTE_INTCTL_SHIFT? Yes, I should have mentioned it in a previous > patch, sorry. Maybe also introduce macros for 3UL and 1, 2, 3 in switch > branches below. > > + case 1: >> + /* pass */ >> + memcpy(dst, src, sizeof(*dst)); >> + break; >> + case 2: >> + /* remap */ >> + if (irte & AMDVI_IRTE_REMAP_MASK) { >> + /* LOCAL APIC address */ >> + dst->address = AMDVI_LOCAL_APIC_ADDR; >> + /* destination mode */ >> + dst->address |= (((irte & AMDVI_IRTE_DM_MASK) >> 6) << >> + AMDVI_MSI_ADDR_DM_RSHIFT); >> + /* RH */ >> + dst->address |= ((irte & AMDVI_IRTE_RQEOI_MASK) >> 5) << >> + AMDVI_MSI_ADDR_RH_RSHIFT; >> + /* Destination ID */ >> + dst->address |= ((irte & AMDVI_IRTE_DEST_MASK) >> 8) << >> + AMDVI_MSI_ADDR_DEST_RSHIFT; >> + /* construct data - vector */ >> + dst->data |= (irte & AMDVI_IRTE_VECTOR_MASK) >> 16; >> + /* Interrupt type */ >> + dst->data |= ((irte & AMDVI_IRTE_INTTYPE_MASK) >> 2) << >> + AMDVI_MSI_DATA_DM_RSHIFT; >> > These bit operations look scary. Did you considered using bitfields or > wrapping them in macros? Will look at introducing macros to cover this comment and the previous one. > > + } else { >> + ret = -AMDVI_TARGET_ABORT; >> + } >> + break; >> + case 0: >> + case 3: >> > In fact, you should report this as event when IR == 1. > > + default: >> + ret = -AMDVI_TARGET_ABORT; >> + } >> + return ret; >> +} >> +/* >> + * We don't support guest virtual APIC so IRTE size will most likely >> always be 4 >> + */ >> +static int amdvi_irte_get(AMDVIState *s, MSIMessage *src, uint32_t *irte, >> + uint64_t *dte, uint16_t devid) >> +{ >> + uint64_t irte_root, offset = devid * AMDVI_DEVTAB_ENTRY_SIZE, >> + irte_size = AMDVI_DEFAULT_IRTE_SIZE, >> + ir_table_size; >> + >> + /* check for GASup and if it's enabled */ >> + if ((amdvi_readq(s, AMDVI_EXT_FEATURES) & AMDVI_GASUP) >> + && (amdvi_readq(s, AMDVI_MMIO_CONTROL) & AMDVI_GAEN)) { >> + /* set a different IRTE size */ >> + irte_size = AMDVI_IRTE_SIZE_GASUP; >> + } >> > As I said, this is likely the only place where we account for Virtual > APIC. You don't seem to handle Virtual APIC Root in DTE, for instance. > Maybe drop this incomplete support altogether, and print some warning here > instead? I'll drop everything and print a warning instead. > > > + if (dma_memory_read(&address_space_memory, s->devtab + offset, dte, >> + AMDVI_DEVTAB_ENTRY_SIZE)) { >> + trace_amdvi_dte_get_fail(s->devtab, offset); >> + return -AMDVI_DEV_TAB_HW; >> + } >> + >> + irte_root = dte[2] & AMDVI_IRTEROOT_MASK; >> + offset = (src->data & AMDVI_IRTE_INDEX_MASK) << 2; >> + ir_table_size = pow(2, dte[2] & AMDVI_IR_TABLE_SIZE_MASK); >> > 1 << dte[2] & AMDVI_IR_TABLE_SIZE_MASK ? > > > + /* enforce IR table size */ >> + if (offset > (ir_table_size * irte_size)) { >> + trace_amdvi_invalid_irte_entry(offset, ir_table_size); >> + return -AMDVI_TARGET_ABORT; >> + } >> + /* read IRTE */ >> + if (dma_memory_read(&address_space_memory, irte_root + offset, >> + irte, sizeof(*irte))) { >> + trace_amdvi_irte_get_fail(irte_root, offset); >> + return -AMDVI_DEV_TAB_HW; >> + } >> + return 0; >> +} >> + >> +static int amdvi_int_remap(X86IOMMUState *iommu, MSIMessage *src, >> + MSIMessage *dst, uint16_t sid) >> +{ >> + trace_amdvi_ir_request(src->data, src->address, sid); >> + >> + AMDVIState *s = AMD_IOMMU_DEVICE(iommu); >> + int ret = 0; >> + uint64_t dte[4]; >> + uint32_t ir_enabled, irte; >> + >> + ret = amdvi_irte_get(s, src, &irte, dte, sid); >> + if (ret < 0) { >> + goto no_remap; >> + } >> + /* interrupt remapping disabled */ >> + if (!(dte[2] & AMDVI_IR_VALID)) { >> + memcpy(dst, src, sizeof(*src)); >> + return ret; >> + } >> + switch (src->data & AMDVI_IR_TYPE_MASK) { >> + case AMDVI_MT_FIXED: >> + case AMDVI_MT_ARBIT: >> + ret = amdvi_remap_ir_intctl(dte[2], irte, src, dst); >> + if (ret < 0) { >> + goto no_remap; >> + } else { >> + s->ir_cache = true; >> + trace_amdvi_ir_remap(dst->data, dst->address, sid); >> + return ret; >> + } >> + /* not handling SMI currently */ >> + case AMDVI_MT_SMI: >> > Maybe also add some warning here so we'd know when to implement SMI Filter. > > > + goto no_remap; >> + case AMDVI_MT_NMI: >> + ir_enabled = AMDVI_DTE_NMIPASS; >> + break; >> + case AMDVI_MT_INIT: >> + ir_enabled = AMDVI_DTE_INTPASS; >> + break; >> + case AMDVI_MT_EXTINT: >> + ir_enabled = AMDVI_DTE_EINTPASS; >> + break; >> + case AMDVI_MT_LINT1: >> + ir_enabled = AMDVI_DTE_LINT1PASS; >> + break; >> + case AMDVI_MT_LINT0: >> + ir_enabled = AMDVI_DTE_LINT0PASS; >> + } >> + >> + ret = amdvi_ir_pass(src, dst, ir_enabled, dte[2]); >> + if (ret < 0){ >> + goto no_remap; >> + } >> + s->ir_cache = true; >> + trace_amdvi_ir_remap(dst->data, dst->address, sid); >> + return ret; >> +no_remap: >> + memcpy(dst, src, sizeof(*src)); >> > Do you need this memcpy()? The original request is target aborted as soon > as this returns. > > + trace_amdvi_ir_target_abort(dst->data, dst->address, sid); >> + return ret; >> +} >> + >> +static MemTxResult amdvi_ir_read(void *opaque, hwaddr addr, >> + uint64_t *data, unsigned size, >> + MemTxAttrs attrs) >> +{ >> + return MEMTX_OK; >> +} >> + >> +static MemTxResult amdvi_ir_write(void *opaque, hwaddr addr, uint64_t >> val, >> + unsigned size, MemTxAttrs attrs) >> +{ >> + AMDVIAddressSpace *as = opaque; >> + MSIMessage from = { addr + AMDVI_INT_ADDR_FIRST, val }, to = { 0, 0}; >> + uint16_t sid = PCI_BUILD_BDFi(as->bus_num, as->devfn); >> > 'BDFi' reads like a typo Right, typo even I am not sure how it got this far. Overall, thanks for review all comments will be covered in next version. > > > + int ret = 0; >> + >> + ret = amdvi_int_remap(X86_IOMMU_DEVICE(as->iommu_state), &from, >> &to, >> + attrs.requester_id); >> + >> + if (ret < 0) { >> + trace_amdvi_ir_target_abort(from.data, from.address, >> + attrs.requester_id); >> + return MEMTX_ERROR; >> + } >> + >> + if(dma_memory_write(&address_space_memory, to.address, &to.data, >> size)) { >> + trace_amdvi_ir_write_fail(to.address, to.data); >> + return MEMTX_ERROR; >> + } >> + >> + return MEMTX_OK; >> +} >> + >> +static const MemoryRegionOps amdvi_ir_ops = { >> + .read_with_attrs = amdvi_ir_read, >> + .write_with_attrs = amdvi_ir_write, >> + .endianness = DEVICE_LITTLE_ENDIAN, >> + .impl = { >> + .min_access_size = 4, >> + .max_access_size = 4, >> + }, >> + .valid = { >> + .min_access_size = 4, >> + .max_access_size = 4, >> + } >> +}; >> + >> static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int >> devfn) >> { >> AMDVIState *s = opaque; >> @@ -1244,6 +1452,12 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus >> *bus, void *opaque, int devfn) >> >> memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s), >> &s->iommu_ops, "amd-iommu", UINT64_MAX); >> + memory_region_init_io(&iommu_as[devfn]->iommu_ir, OBJECT(s), >> + &amdvi_ir_ops, iommu_as[devfn], >> "amd-iommu-ir", >> + AMDVI_INT_ADDR_SIZE); >> + memory_region_add_subregion(&iommu_as[devfn]->iommu, >> + AMDVI_INT_ADDR_FIRST, >> + &iommu_as[devfn]->iommu_ir); >> address_space_init(&iommu_as[devfn]->as, >> &iommu_as[devfn]->iommu, >> "amd-iommu"); >> } >> @@ -1292,6 +1506,7 @@ static void amdvi_init(AMDVIState *s) >> s->enabled = false; >> s->ats_enabled = false; >> s->cmdbuf_enabled = false; >> + s->ir_cache = false; >> >> /* reset MMIO */ >> memset(s->mmior, 0, AMDVI_MMIO_SIZE); >> @@ -1331,11 +1546,15 @@ static void amdvi_realize(DeviceState *dev, Error >> **err) >> AMDVIState *s = AMD_IOMMU_DEVICE(dev); >> X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); >> PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus; >> + PCMachineState *pcms = PC_MACHINE(qdev_get_machine()); >> s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, >> amdvi_uint64_equal, g_free, g_free); >> >> - /* This device should take care of IOMMU PCI properties */ >> + /* AMD IOMMU has Interrupt Remapping on by default */ >> + x86_iommu->intr_supported = true; >> x86_iommu->type = TYPE_AMD; >> + >> + /* This device should take care of IOMMU PCI properties */ >> qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus); >> object_property_set_bool(OBJECT(&s->pci), true, "realized", err); >> s->capab_offset = pci_add_capability(&s->pci.dev, >> AMDVI_CAPAB_ID_SEC, 0, >> @@ -1347,9 +1566,13 @@ static void amdvi_realize(DeviceState *dev, Error >> **err) >> memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, >> "amdvi-mmio", >> AMDVI_MMIO_SIZE); >> >> + x86_iommu->ioapic_bdf = PCI_BUILD_BDF(AMDVI_BUS_NUM, >> + AMDVI_DEVFN_IOAPIC); >> + >> sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio); >> sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR); >> pci_setup_iommu(bus, amdvi_host_dma_iommu, s); >> + pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_DEVFN_IOAPIC); >> s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err); >> msi_init(&s->pci.dev, 0, 1, true, false, err); >> amdvi_init(s); >> @@ -1376,6 +1599,7 @@ static void amdvi_class_init(ObjectClass *klass, >> void* data) >> dc->vmsd = &vmstate_amdvi; >> dc->hotpluggable = false; >> dc_class->realize = amdvi_realize; >> + dc_class->int_remap = amdvi_int_remap; >> } >> >> static const TypeInfo amdvi = { >> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h >> index 2a7f19e..f0e23a8 100644 >> --- a/hw/i386/amd_iommu.h >> +++ b/hw/i386/amd_iommu.h >> @@ -356,6 +356,8 @@ typedef struct AMDVIState { >> uint32_t evtlog_len; /* event log length */ >> uint32_t evtlog_head; /* current IOMMU write position */ >> uint32_t evtlog_tail; /* current Software read position */ >> + /* whether we have remapped any interrupts and hence IR cache */ >> + bool ir_cache; >> >> /* unused for now */ >> hwaddr excl_base; /* base DVA - IOMMU exclusion range */ >> >> > Valentine >