Re: [Qemu-devel] [PATCH] intel_iommu: fix VTD_SID_TO_BUS
Hi Markus, 2014-10-20 19:41 GMT+08:00 Markus Armbruster : > "Michael S. Tsirkin" writes: > >> (((sid) >> 8) && 0xff) makes no sense >> (((sid) >> 8) & 0xff) seems to be what was meant. >> >> Suggested-by: Markus Armbruster > > Actually by the reporter of https://bugs.launchpad.net/bugs/1382477 > >> Signed-off-by: Michael S. Tsirkin >> --- >> >> Compile-tested only. >> >> include/hw/i386/intel_iommu.h | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h >> index f4701e1..e321ee4 100644 >> --- a/include/hw/i386/intel_iommu.h >> +++ b/include/hw/i386/intel_iommu.h >> @@ -37,7 +37,7 @@ >> #define VTD_PCI_DEVFN_MAX 256 >> #define VTD_PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) >> #define VTD_PCI_FUNC(devfn) ((devfn) & 0x07) >> -#define VTD_SID_TO_BUS(sid) (((sid) >> 8) && 0xff) >> +#define VTD_SID_TO_BUS(sid) (((sid) >> 8) & 0xff) >> #define VTD_SID_TO_DEVFN(sid) ((sid) & 0xff) >> >> #define DMAR_REG_SIZE 0x230 > > Can't find the spec right now, but it looks plausible enough. Yes, this is a typo. I am sorry that I introduced such a mistake. The spec is here in Section 3.4 : http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html VTD_SID_TO_BUS(sid) is intended to be used to get the bus id from the source identifier. Thanks very much! Regards, Le > Only use is in vtd_context_device_invalidate(). Bug's impact isn't > obvious to me. > > Reviewed-by: Markus Armbruster
[Qemu-devel] [PATCH 2/3] intel-iommu: add DMAR table to ACPI tables
Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE exists, add DMAR table to ACPI RSDT table. For now the DMAR table indicates that there is only one hardware unit without INTR_REMAP capability on the platform. Signed-off-by: Le Tan --- hw/i386/acpi-build.c | 41 ++ hw/i386/acpi-defs.h | 70 2 files changed, 111 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index ebc5f03..8241621 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -45,6 +45,7 @@ #include "hw/i386/ich9.h" #include "hw/pci/pci_bus.h" #include "hw/pci-host/q35.h" +#include "hw/i386/intel_iommu.h" #include "hw/i386/q35-acpi-dsdt.hex" #include "hw/i386/acpi-dsdt.hex" @@ -1316,6 +1317,31 @@ build_mcfg_q35(GArray *table_data, GArray *linker, AcpiMcfgInfo *info) } static void +build_dmar_q35(GArray *table_data, GArray *linker) +{ +int dmar_start = table_data->len; + +AcpiTableDmar *dmar; +AcpiDmarHardwareUnit *drhd; + +dmar = acpi_data_push(table_data, sizeof(*dmar)); +dmar->host_address_width = 0x26;/* 0x26 + 1 = 39 */ +dmar->flags = 0;/* No intr_remap for now */ + +/* DMAR Remapping Hardware Unit Definition structure */ +drhd = acpi_data_push(table_data, sizeof(*drhd)); +drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); +drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */ +drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; +drhd->pci_segment = cpu_to_le16(0); +drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); + +build_header(linker, table_data, (void *)(table_data->data + dmar_start), + "DMAR", table_data->len - dmar_start, 1); +} + + +static void build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc) { AcpiTableHeader *dsdt; @@ -1436,6 +1462,17 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) return true; } +static bool acpi_has_iommu(void) +{ +bool ambiguous; +Object *intel_iommu; + +intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE, + &ambiguous); +return intel_iommu && !ambiguous; +} + + static void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) { @@ -1497,6 +1534,10 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) acpi_add_table(table_offsets, tables->table_data); build_mcfg_q35(tables->table_data, tables->linker, &mcfg); } +if (acpi_has_iommu()) { +acpi_add_table(table_offsets, tables->table_data); +build_dmar_q35(tables->table_data, tables->linker); +} /* Add tables supplied by user (if any) */ for (u = acpi_table_first(); u; u = acpi_table_next(u)) { diff --git a/hw/i386/acpi-defs.h b/hw/i386/acpi-defs.h index e93babb..9674825 100644 --- a/hw/i386/acpi-defs.h +++ b/hw/i386/acpi-defs.h @@ -314,4 +314,74 @@ struct AcpiTableMcfg { } QEMU_PACKED; typedef struct AcpiTableMcfg AcpiTableMcfg; +/* DMAR - DMA Remapping table r2.2 */ +struct AcpiTableDmar { +ACPI_TABLE_HEADER_DEF +uint8_t host_address_width; /* Maximum DMA physical addressability */ +uint8_t flags; +uint8_t reserved[10]; +} QEMU_PACKED; +typedef struct AcpiTableDmar AcpiTableDmar; + +/* Masks for Flags field above */ +#define ACPI_DMAR_INTR_REMAP(1) +#define ACPI_DMAR_X2APIC_OPT_OUT(2) + +/* + * DMAR sub-structures (Follow DMA Remapping table) + */ +#define ACPI_DMAR_SUB_HEADER_DEF /* Common ACPI DMAR sub-structure header */\ +uint16_t type; \ +uint16_t length; + +/* Values for sub-structure type for DMAR */ +enum { +ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, /* DRHD */ +ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */ +ACPI_DMAR_TYPE_ATSR = 2,/* ATSR */ +ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, /* RHSR */ +ACPI_DMAR_TYPE_ANDD = 4,/* ANDD */ +ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */ +}; + +/* + * Sub-structures for DMAR, correspond to Type in ACPI_DMAR_SUB_HEADER_DEF + */ + +/* DMAR Device Scope structures */ +struct AcpiDmarDeviceScope { +uint8_t type; +uint8_t length; +uint16_t reserved; +uint8_t enumeration_id; +uint8_t start_bus_number; +uint8_t path[0]; +} QEMU_PACKED; +typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope; + +/* Values for type in struct AcpiDmarDeviceScope */ +enum { +ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, +ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, +ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, +ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, +ACPI_DMAR_SCOPE_TYPE_HPET = 4, +ACPI_DMAR_SCOPE_TYPE_ACPI = 5, +ACPI_DMAR_SCOPE_TYPE_RESERVED = 6 /* Reserved for future use */ +}; + +/* 0: Hardware Unit Definition */ +struct AcpiDmarHardwareUnit { +ACPI_DMAR_SUB_HEADER_DEF +uin
[Qemu-devel] [PATCH 1/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation
Add support for emulating Intel IOMMU according to the VT-d specification for the q35 chipset machine. Implement the logic for DMAR (DMA remapping) without PASID support. Use register-based invalidation for context-cache invalidation and IOTLB invalidation. Basic fault reporting and caching are not implemented yet. Signed-off-by: Le Tan --- hw/i386/Makefile.objs |1 + hw/i386/intel_iommu.c | 1139 + include/hw/i386/intel_iommu.h | 350 + 3 files changed, 1490 insertions(+) create mode 100644 hw/i386/intel_iommu.c create mode 100644 include/hw/i386/intel_iommu.h diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index 48014ab..6936111 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/ obj-y += multiboot.o smbios.o obj-y += pc.o pc_piix.o pc_q35.o obj-y += pc_sysfw.o +obj-y += intel_iommu.o obj-$(CONFIG_XEN) += ../xenpv/ xen/ obj-y += kvmvapic.o diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c new file mode 100644 index 000..3ba0e1e --- /dev/null +++ b/hw/i386/intel_iommu.c @@ -0,0 +1,1139 @@ +/* + * QEMU emulation of an Intel IOMMU (VT-d) + * (DMA Remapping device) + * + * Copyright (c) 2013 Knut Omang, Oracle + * Copyright (C) 2014 Le Tan, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "hw/sysbus.h" +#include "exec/address-spaces.h" +#include "hw/i386/intel_iommu.h" + +/* #define DEBUG_INTEL_IOMMU */ +#ifdef DEBUG_INTEL_IOMMU +#define D(fmt, ...) \ +do { fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ + ## __VA_ARGS__); } while (0) +#else +#define D(fmt, ...) \ +do { } while (0) +#endif + + +static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, +uint64_t wmask, uint64_t w1cmask) +{ +*((uint64_t *)&s->csr[addr]) = val; +*((uint64_t *)&s->wmask[addr]) = wmask; +*((uint64_t *)&s->w1cmask[addr]) = w1cmask; +} + +static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr, + uint64_t mask) +{ +*((uint64_t *)&s->womask[addr]) = mask; +} + +static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val, +uint32_t wmask, uint32_t w1cmask) +{ +*((uint32_t *)&s->csr[addr]) = val; +*((uint32_t *)&s->wmask[addr]) = wmask; +*((uint32_t *)&s->w1cmask[addr]) = w1cmask; +} + +static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr, + uint32_t mask) +{ +*((uint32_t *)&s->womask[addr]) = mask; +} + +/* "External" get/set operations */ +static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) +{ +uint64_t oldval = *((uint64_t *)&s->csr[addr]); +uint64_t wmask = *((uint64_t *)&s->wmask[addr]); +uint64_t w1cmask = *((uint64_t *)&s->w1cmask[addr]); +*((uint64_t *)&s->csr[addr]) = +((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val); +} + +static inline void set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val) +{ +uint32_t oldval = *((uint32_t *)&s->csr[addr]); +uint32_t wmask = *((uint32_t *)&s->wmask[addr]); +uint32_t w1cmask = *((uint32_t *)&s->w1cmask[addr]); +*((uint32_t *)&s->csr[addr]) = +((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val); +} + +static inline uint64_t get_quad(IntelIOMMUState *s, hwaddr addr) +{ +uint64_t val = *((uint64_t *)&s->csr[addr]); +uint64_t womask = *((uint64_t *)&s->womask[addr]); +return val & ~womask; +} + + +static inline uint32_t get_long(IntelIOMMUState *s, hwaddr addr) +{ +uint32_t val = *((uint32_t *)&s->csr[addr]); +uint32_t womask = *((uint32_t *)&s->womask[addr]); +return val & ~womask; +} + + + +/* "Internal" get/set operations */ +static inline uint64_t __get_quad(IntelIOMMUState *s, hwaddr addr) +{ +return *((uint64_t *)&s->csr[addr]); +} + +static inline uint32_t __get_long(IntelIOMMUState *s, hwaddr addr) +{ +return *((uint32_t *)&s->csr[ad
[Qemu-devel] [PATCH 0/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset
Hi, These patches are intended to introduce Intel IOMMU (VT-d) emulation to q35 chipset. The major job in these patches is to add support for emulating Intel IOMMU according to the VT-d specification, including basic responses to CSRs accesses, the logic of DMAR (DMA remapping) and DMA memory address translations. Features implemented for now are: 1. Response to important CSRs accesses; 2. DMAR (DMA remapping) without PASID support; 3. Use register-based invalidation for IOTLB and context cache invalidation; 4. Add DMAR table to ACPI tables to expose VT-d to BIOS; 5. Add "-machine vtd=on|off" option to enable/disable VT-d; 6. Only one DMAR unit for all the devices of PCI Segment 0. Testing: 1. L1 guest with Linux with intel_iommu=on can interact with VT-d and boot smoothly, and I can see info about VT-d in log of kernel; 2. Run L1 with VT-d, L2 guest with Linux can boot smoothly withou PCI device passthrough; 3. Run L1 with VT-d and "-soundhw ac97 (QEMU_AUDIO_DRV=none)", then assign the sound card to L2; L2 can boot smoothly with legacy PCI assignment; 4. Jailhouse hypervisor seems to run smoothly for now (tested by Jan). 5. Run L1 with VT-d and e1000 network card, then assign e1000 to L2; L2 will be STUCK when booting. This still remains unsolved now. As far as I know, I suppose that the L2 crashes when doing e1000_probe(). The QEMU of L1 will dump something with "KVM: entry failed, hardware error 0x0", and the KVM of host will print "nested_vmx_exit_handled failed vm entry 7". Unlike assigning the sound card, after being assigned to L2, there is no translation entry of e1000 through VT-d, which I think means that e1000 doesn't issue any DMA access during the boot of L2. Sometimes the kernel of L2 will print "divide error" during booting. Can someone help me with this? Any help is appreciated! :) 6. VFIO has not been tested yet and I will do this soon. I have some questions want to consult here: 1. Now the struct IntelIOMMUState is a member of MCHPCIState. VT-d is registered as TYPE_SYS_BUS_DEVICE but registers its configuration MemoryRegion as subregion of mch->pci_address_space. Is this correct? Another thought comes to my mind is using sysbus_mmio_map() to map the MemoryRegion of VT-d. But I am not sure. And maybe there are more improper usage of the QOM. 2. For declaration of porinter of pointer, like VTDAddressSpace **address_spaces, checkpatch.pl will warn that "ERROR: need consistent spacing around '*' (ctx:WxO)". Is checkpatch.pl wrong? Then what is the proper declaration? TODO: 1. Fix the bug of legacy PCI assignment; 2. Test VFIO; 3. Queued Invalidation; 4. Basic fault reporting; 5. Caching propertities of IOTLB; 6. Clear up codes related to migration. Thanks very much! Git trees: https://github.com/tamlok/qemu/commits/q35-iommu-v2 Le Tan (3): intel-iommu: introduce Intel IOMMU (VT-d) emulation intel-iommu: add DMAR table to ACPI tables intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "vtd" as a switch hw/core/machine.c | 27 +- hw/i386/Makefile.objs |1 + hw/i386/acpi-build.c | 41 ++ hw/i386/acpi-defs.h | 70 +++ hw/i386/intel_iommu.c | 1139 + hw/pci-host/q35.c | 72 ++- include/hw/boards.h |1 + include/hw/i386/intel_iommu.h | 350 + include/hw/pci-host/q35.h |2 + qemu-options.hx |5 +- vl.c |4 + 11 files changed, 1703 insertions(+), 9 deletions(-) create mode 100644 hw/i386/intel_iommu.c create mode 100644 include/hw/i386/intel_iommu.h -- 1.9.1
[Qemu-devel] [PATCH 3/3] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "vtd" as a switch
Add Intel IOMMU emulation to q35 chipset and expose it to the guest. 1. Add a machine option. Users can use "-machine vtd=on|off" in the command line to enable/disable Intel IOMMU. The default is off. 2. Accroding to the machine option, q35 will initialize the Intel IOMMU and use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function for the pci bus. 3. q35_host_dma_iommu() will return different address space according to the bus_num and devfn of the device. Signed-off-by: Le Tan --- hw/core/machine.c | 27 -- hw/pci-host/q35.c | 72 +++ include/hw/boards.h | 1 + include/hw/pci-host/q35.h | 2 ++ qemu-options.hx | 5 +++- vl.c | 4 +++ 6 files changed, 102 insertions(+), 9 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index cbba679..1be9ef2 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const char *value, Error **errp) ms->firmware = g_strdup(value); } +static bool machine_get_vtd(Object *obj, Error **errp) +{ +MachineState *ms = MACHINE(obj); + +return ms->vtd; +} + +static void machine_set_vtd(Object *obj, bool value, Error **errp) +{ +MachineState *ms = MACHINE(obj); + +ms->vtd = value; +} + static void machine_initfn(Object *obj) { object_property_add_str(obj, "accel", @@ -270,10 +284,17 @@ static void machine_initfn(Object *obj) machine_set_dump_guest_core, NULL); object_property_add_bool(obj, "mem-merge", - machine_get_mem_merge, machine_set_mem_merge, NULL); -object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, NULL); + machine_get_mem_merge, + machine_set_mem_merge, NULL); +object_property_add_bool(obj, "usb", + machine_get_usb, + machine_set_usb, NULL); object_property_add_str(obj, "firmware", -machine_get_firmware, machine_set_firmware, NULL); +machine_get_firmware, +machine_set_firmware, NULL); +object_property_add_bool(obj, "vtd", + machine_get_vtd, + machine_set_vtd, NULL); } static void machine_finalize(Object *obj) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index a0a3068..933f76e 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -347,6 +347,61 @@ static void mch_reset(DeviceState *qdev) mch_update(mch); } +static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) +{ +IntelIOMMUState *s = opaque; +VTDAddressSpace **pvtd_as; +VTDAddressSpace *vtd_as; +int bus_num = pci_bus_num(bus); + +assert(devfn >= 0); + +pvtd_as = s->address_spaces[bus_num]; +if (!pvtd_as) { +/* No corresponding free() */ +pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) * +VTD_PCI_SLOT_MAX * VTD_PCI_FUNC_MAX); +s->address_spaces[bus_num] = pvtd_as; +} + +vtd_as = *(pvtd_as + devfn); +if (!vtd_as) { +vtd_as = g_malloc0(sizeof(*vtd_as)); +*(pvtd_as + devfn) = vtd_as; + +vtd_as->bus_num = bus_num; +vtd_as->devfn = devfn; +vtd_as->iommu_state = s; +memory_region_init_iommu(&vtd_as->iommu, OBJECT(s), &s->iommu_ops, + "intel_iommu", UINT64_MAX); +address_space_init(&vtd_as->as, &vtd_as->iommu, "intel_iommu"); +} + +return &vtd_as->as; +} + +static void mch_init_dmar(MCHPCIState *mch) +{ +Error *error = NULL; +PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch))); + +mch->iommu = INTEL_IOMMU_DEVICE(object_new(TYPE_INTEL_IOMMU_DEVICE)); +qdev_set_parent_bus(DEVICE(mch->iommu), sysbus_get_default()); +object_property_set_bool(OBJECT(mch->iommu), true, "realized", &error); + +if (error) { +fprintf(stderr, "%s\n", error_get_pretty(error)); +error_free(error); +return; +} + +memory_region_add_subregion(mch->pci_address_space, +Q35_HOST_BRIDGE_IOMMU_ADDR, +&mch->iommu->csrmem); +pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu); +} + + static int mch_init(PCIDevice *d) { int i; @@ -363,13 +418,20 @@ static int mch_init(PCIDevice *d) memory_region_add_subregion_overlap(mch->system_memory, 0xa, &mch->smram_region, 1); memory_region_set_enabled(&mch->smram_r
Re: [Qemu-devel] [PATCH 1/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation
Hi Michael, Thanks very much for your careful reviewing! 2014-07-23 4:05 GMT+08:00 Michael S. Tsirkin : > On Tue, Jul 22, 2014 at 11:47:48PM +0800, Le Tan wrote: >> Add support for emulating Intel IOMMU according to the VT-d specification for >> the q35 chipset machine. Implement the logic for DMAR (DMA remapping) without >> PASID support. Use register-based invalidation for context-cache invalidation >> and IOTLB invalidation. >> Basic fault reporting and caching are not implemented yet. >> >> Signed-off-by: Le Tan >> --- >> hw/i386/Makefile.objs |1 + >> hw/i386/intel_iommu.c | 1139 >> + >> include/hw/i386/intel_iommu.h | 350 + >> 3 files changed, 1490 insertions(+) >> create mode 100644 hw/i386/intel_iommu.c >> create mode 100644 include/hw/i386/intel_iommu.h >> >> diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs >> index 48014ab..6936111 100644 >> --- a/hw/i386/Makefile.objs >> +++ b/hw/i386/Makefile.objs >> @@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/ >> obj-y += multiboot.o smbios.o >> obj-y += pc.o pc_piix.o pc_q35.o >> obj-y += pc_sysfw.o >> +obj-y += intel_iommu.o >> obj-$(CONFIG_XEN) += ../xenpv/ xen/ >> >> obj-y += kvmvapic.o >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> new file mode 100644 >> index 000..3ba0e1e >> --- /dev/null >> +++ b/hw/i386/intel_iommu.c >> @@ -0,0 +1,1139 @@ >> +/* >> + * QEMU emulation of an Intel IOMMU (VT-d) >> + * (DMA Remapping device) >> + * >> + * Copyright (c) 2013 Knut Omang, Oracle >> + * Copyright (C) 2014 Le Tan, >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + >> + * You should have received a copy of the GNU General Public License >> + * along with this program; if not, write to the Free Software >> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 >> USA >> + */ >> + >> +#include "hw/sysbus.h" >> +#include "exec/address-spaces.h" >> +#include "hw/i386/intel_iommu.h" >> + >> +/* #define DEBUG_INTEL_IOMMU */ >> +#ifdef DEBUG_INTEL_IOMMU >> +#define D(fmt, ...) \ >> +do { fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ >> + ## __VA_ARGS__); } while (0) >> +#else >> +#define D(fmt, ...) \ >> +do { } while (0) >> +#endif >> + > > > Way too short for a macro name, might conflict with some > header you include. > You are polluting the global namespace. > Best to prefix everything by INTEL_IOMMU_ and intel_iommu_. Get it. Maybe I will use VTD as the prefix for this. > >> + >> +static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t >> val, >> +uint64_t wmask, uint64_t w1cmask) >> +{ >> +*((uint64_t *)&s->csr[addr]) = val; >> +*((uint64_t *)&s->wmask[addr]) = wmask; >> +*((uint64_t *)&s->w1cmask[addr]) = w1cmask; >> +} >> + >> +static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr, >> + uint64_t mask) >> +{ >> +*((uint64_t *)&s->womask[addr]) = mask; >> +} >> + >> +static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t >> val, >> +uint32_t wmask, uint32_t w1cmask) >> +{ >> +*((uint32_t *)&s->csr[addr]) = val; >> +*((uint32_t *)&s->wmask[addr]) = wmask; >> +*((uint32_t *)&s->w1cmask[addr]) = w1cmask; >> +} >> + >> +static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr, >> + uint32_t mask) >> +{ >> +*((uint32_t *)&s->womask[addr]) = mask; >> +} >> + >> +/* "External" get/set operations */ >> +static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) >> +{ >> +uint64_t oldval = *((uint64_t *)&s->csr[addr]); >>
Re: [Qemu-devel] [PATCH 1/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation
Hi Paolo, 2014-07-23 15:58 GMT+08:00 Paolo Bonzini : > Il 22/07/2014 17:47, Le Tan ha scritto: >> +static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t >> val, >> +uint64_t wmask, uint64_t w1cmask) >> +{ >> +*((uint64_t *)&s->csr[addr]) = val; > > All these casts are not endian-safe. Please use ldl_le_p, ldq_le_p, > stl_le_p, stq_le_p. Thanks very much. Finally I got the idea here.:) Also thanks for your renaming suggestions. >> +*((uint64_t *)&s->wmask[addr]) = wmask; >> +*((uint64_t *)&s->w1cmask[addr]) = w1cmask; >> +} >> + >> +static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr, >> + uint64_t mask) >> +{ >> +*((uint64_t *)&s->womask[addr]) = mask; >> +} >> + >> +static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t >> val, >> +uint32_t wmask, uint32_t w1cmask) >> +{ >> +*((uint32_t *)&s->csr[addr]) = val; >> +*((uint32_t *)&s->wmask[addr]) = wmask; >> +*((uint32_t *)&s->w1cmask[addr]) = w1cmask; >> +} >> + >> +static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr, >> + uint32_t mask) >> +{ >> +*((uint32_t *)&s->womask[addr]) = mask; >> +} >> + >> +/* "External" get/set operations */ >> +static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) >> +{ >> +uint64_t oldval = *((uint64_t *)&s->csr[addr]); >> +uint64_t wmask = *((uint64_t *)&s->wmask[addr]); >> +uint64_t w1cmask = *((uint64_t *)&s->w1cmask[addr]); >> +*((uint64_t *)&s->csr[addr]) = >> +((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val); >> +} >> + >> +static inline void set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val) >> +{ >> +uint32_t oldval = *((uint32_t *)&s->csr[addr]); >> +uint32_t wmask = *((uint32_t *)&s->wmask[addr]); >> +uint32_t w1cmask = *((uint32_t *)&s->w1cmask[addr]); >> +*((uint32_t *)&s->csr[addr]) = >> +((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val); >> +} >> + >> +static inline uint64_t get_quad(IntelIOMMUState *s, hwaddr addr) >> +{ >> +uint64_t val = *((uint64_t *)&s->csr[addr]); >> +uint64_t womask = *((uint64_t *)&s->womask[addr]); >> +return val & ~womask; >> +} >> + >> + >> +static inline uint32_t get_long(IntelIOMMUState *s, hwaddr addr) >> +{ >> +uint32_t val = *((uint32_t *)&s->csr[addr]); >> +uint32_t womask = *((uint32_t *)&s->womask[addr]); >> +return val & ~womask; >> +} >> + >> + >> + >> +/* "Internal" get/set operations */ >> +static inline uint64_t __get_quad(IntelIOMMUState *s, hwaddr addr) > > get_quad_raw? > >> +{ >> +return *((uint64_t *)&s->csr[addr]); >> +} >> + >> +static inline uint32_t __get_long(IntelIOMMUState *s, hwaddr addr) > > get_long_raw? > >> +{ >> +return *((uint32_t *)&s->csr[addr]); >> +} >> + >> + >> +/* val = (val & ~clear) | mask */ >> +static inline uint32_t set_mask_long(IntelIOMMUState *s, hwaddr addr, > > set_clear_long? > >> + uint32_t clear, uint32_t mask) >> +{ >> +uint32_t *ptr = (uint32_t *)&s->csr[addr]; >> +uint32_t val = (*ptr & ~clear) | mask; >> +*ptr = val; >> +return val; >> +} >> + >> +/* val = (val & ~clear) | mask */ >> +static inline uint64_t set_mask_quad(IntelIOMMUState *s, hwaddr addr, > > set_clear_quad? >> + uint64_t clear, uint64_t mask) >> +{ >> +uint64_t *ptr = (uint64_t *)&s->csr[addr]; >> +uint64_t val = (*ptr & ~clear) | mask; >> +*ptr = val; >> +return val; >> +} >> + >> + > Regards, Le
Re: [Qemu-devel] [PATCH 1/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation
Hi Stefan, 2014-07-24 4:29 GMT+08:00 Stefan Weil : > Am 22.07.2014 17:47, schrieb Le Tan: >> Add support for emulating Intel IOMMU according to the VT-d specification for >> the q35 chipset machine. Implement the logic for DMAR (DMA remapping) without >> PASID support. Use register-based invalidation for context-cache invalidation >> and IOTLB invalidation. >> Basic fault reporting and caching are not implemented yet. >> >> Signed-off-by: Le Tan >> --- >> hw/i386/Makefile.objs |1 + >> hw/i386/intel_iommu.c | 1139 >> + >> include/hw/i386/intel_iommu.h | 350 + >> 3 files changed, 1490 insertions(+) >> create mode 100644 hw/i386/intel_iommu.c >> create mode 100644 include/hw/i386/intel_iommu.h >> > [...] >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> new file mode 100644 >> index 000..3ba0e1e >> --- /dev/null >> +++ b/hw/i386/intel_iommu.c >> @@ -0,0 +1,1139 @@ >> +/* >> + * QEMU emulation of an Intel IOMMU (VT-d) >> + * (DMA Remapping device) >> + * >> + * Copyright (c) 2013 Knut Omang, Oracle >> + * Copyright (C) 2014 Le Tan, >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + >> + * You should have received a copy of the GNU General Public License >> + * along with this program; if not, write to the Free Software >> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 >> USA >> + */ >> + > > I suggest replacing the FSF address here (and in other files) by the URL: > > * You should have received a copy of the GNU General Public License along > * with this program; if not, see <http://www.gnu.org/licenses/>. > > This is the standard used for most GPL text in QEMU source files. Get it. I copied it from the Linux kernel tree. Thanks very much! > Regards > Stefan W. > Regards, Le
[Qemu-devel] [PATCH v2 0/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset
Hi, These patches are intended to introduce Intel IOMMU (VT-d) emulation to q35 chipset. The major job in these patches is to add support for emulating Intel IOMMU according to the VT-d specification, including basic responses to CSRs accesses, the logic of DMAR (DMA remapping) and DMA memory address translations. Features implemented for now are: 1. Response to important CSRs accesses; 2. DMAR (DMA remapping) without PASID support; 3. Use register-based invalidation for IOTLB and context cache invalidation; 4. Add DMAR table to ACPI tables to expose VT-d to BIOS; 5. Add "-machine iommu=on|off" option to enable/disable VT-d; 6. Only one DMAR unit for all the devices of PCI Segment 0. Testing: 1. L1 guest with Linux with intel_iommu=on can interact with VT-d and boot smoothly, and I can see info about VT-d in log of kernel; 2. Run L1 with VT-d, L2 guest with Linux can boot smoothly withou PCI device passthrough; 3. Run L1 with VT-d and "-soundhw ac97 (QEMU_AUDIO_DRV=none)", then assign the sound card to L2; L2 can boot smoothly with legacy PCI assignment; 4. Jailhouse hypervisor seems to run smoothly for now (tested by Jan). 5. Run L1 with VT-d and e1000 network card, then assign e1000 to L2; L2 will be STUCK when booting. This still remains unsolved now. As far as I know, I suppose that the L2 crashes when doing e1000_probe(). The QEMU of L1 will dump something with "KVM: entry failed, hardware error 0x0", and the KVM of host will print "nested_vmx_exit_handled failed vm entry 7". Unlike assigning the sound card, after being assigned to L2, there is no translation entry of e1000 through VT-d, which I think means that e1000 doesn't issue any DMA access during the boot of L2. Sometimes the kernel of L2 will print "divide error" during booting. Can someone help me with this? Any help is appreciated! :) 6. VFIO is tested and is the same as legacy pci assignment. I have some questions want to consult here: 1. Now the struct IntelIOMMUState is a member of MCHPCIState. VT-d is registered as TYPE_SYS_BUS_DEVICE but registers its configuration MemoryRegion as subregion of mch->pci_address_space. Is this correct? Another thought comes to my mind is using sysbus_mmio_map() to map the MemoryRegion of VT-d. But I am not sure. And maybe there are more improper usage of the QOM. 2. For declaration of porinter of pointer, like VTDAddressSpace **address_spaces, checkpatch.pl will warn that "ERROR: need consistent spacing around '*' (ctx:WxO)". Is checkpatch.pl wrong? TODO: 1. Fix the bug of legacy PCI assignment; 2. Clear up codes related to migration. 3. Queued Invalidation; 4. Basic fault reporting; 5. Caching propertities of IOTLB; Changes since v1: *address reviewing suggestions given by Michael, Paolo, Stefan and Jan -split intel_iommu.h to include/hw/i386/intel_iommu.h and hw/i386/intel_iommu_internal.h -change the copyright information -change D() to VTD_DPRINTF() -remove dead code -rename constant definitions with consistent prefix VTD_ -rename some struct definitions according to QEMU standard -rename some CSRs access functions -use endian-save functions to access CSRs -change machine option to "iommu=on|off" Thanks very much! Git trees: https://github.com/tamlok/qemu Le Tan (3): intel-iommu: introduce Intel IOMMU (VT-d) emulation intel-iommu: add DMAR table to ACPI tables intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch hw/core/machine.c | 27 +- hw/i386/Makefile.objs | 1 + hw/i386/acpi-build.c | 41 ++ hw/i386/acpi-defs.h| 70 hw/i386/intel_iommu.c | 911 + hw/i386/intel_iommu_internal.h | 257 hw/pci-host/q35.c | 72 +++- include/hw/boards.h| 1 + include/hw/i386/intel_iommu.h | 75 include/hw/pci-host/q35.h | 2 + qemu-options.hx| 5 +- vl.c | 4 + 12 files changed, 1457 insertions(+), 9 deletions(-) create mode 100644 hw/i386/intel_iommu.c create mode 100644 hw/i386/intel_iommu_internal.h create mode 100644 include/hw/i386/intel_iommu.h -- 1.9.1
[Qemu-devel] [PATCH v2 1/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation
Add support for emulating Intel IOMMU according to the VT-d specification for the q35 chipset machine. Implement the logic for DMAR (DMA remapping) without PASID support. Use register-based invalidation for context-cache invalidation and IOTLB invalidation. Basic fault reporting and caching are not implemented yet. Signed-off-by: Le Tan --- hw/i386/Makefile.objs | 1 + hw/i386/intel_iommu.c | 911 + hw/i386/intel_iommu_internal.h | 257 include/hw/i386/intel_iommu.h | 75 4 files changed, 1244 insertions(+) create mode 100644 hw/i386/intel_iommu.c create mode 100644 hw/i386/intel_iommu_internal.h create mode 100644 include/hw/i386/intel_iommu.h diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index 48014ab..6936111 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/ obj-y += multiboot.o smbios.o obj-y += pc.o pc_piix.o pc_q35.o obj-y += pc_sysfw.o +obj-y += intel_iommu.o obj-$(CONFIG_XEN) += ../xenpv/ xen/ obj-y += kvmvapic.o diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c new file mode 100644 index 000..718993a --- /dev/null +++ b/hw/i386/intel_iommu.c @@ -0,0 +1,911 @@ +/* + * QEMU emulation of an Intel IOMMU (VT-d) + * (DMA Remapping device) + * + * Copyright (C) 2013 Knut Omang, Oracle + * Copyright (C) 2014 Le Tan, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/sysbus.h" +#include "exec/address-spaces.h" +#include "intel_iommu_internal.h" + + +/*#define DEBUG_INTEL_IOMMU*/ +#ifdef DEBUG_INTEL_IOMMU +#define VTD_DPRINTF(fmt, ...) \ +do { fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ + ## __VA_ARGS__); } while (0) +#else +#define VTD_DPRINTF(fmt, ...) \ +do { } while (0) +#endif + +static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, + uint64_t wmask, uint64_t w1cmask) +{ +stq_le_p(&s->csr[addr], val); +stq_le_p(&s->wmask[addr], wmask); +stq_le_p(&s->w1cmask[addr], w1cmask); +} + +static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr, + uint64_t mask) +{ +stq_le_p(&s->womask[addr], mask); +} + +static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val, + uint32_t wmask, uint32_t w1cmask) +{ +stl_le_p(&s->csr[addr], val); +stl_le_p(&s->wmask[addr], wmask); +stl_le_p(&s->w1cmask[addr], w1cmask); +} + +static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr, + uint32_t mask) +{ +stl_le_p(&s->womask[addr], mask); +} + +/* "External" get/set operations */ +static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) +{ +uint64_t oldval = ldq_le_p(&s->csr[addr]); +uint64_t wmask = ldq_le_p(&s->wmask[addr]); +uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); +stq_le_p(&s->csr[addr], + ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); +} + +static inline void set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val) +{ +uint32_t oldval = ldl_le_p(&s->csr[addr]); +uint32_t wmask = ldl_le_p(&s->wmask[addr]); +uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); +stl_le_p(&s->csr[addr], + ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); +} + +static inline uint64_t get_quad(IntelIOMMUState *s, hwaddr addr) +{ +uint64_t val = ldq_le_p(&s->csr[addr]); +uint64_t womask = ldq_le_p(&s->womask[addr]); +return val & ~womask; +} + + +static inline uint32_t get_long(IntelIOMMUState *s, hwaddr addr) +{ +uint32_t val = ldl_le_p(&s->csr[addr]); +uint32_t womask = ldl_le_p(&s->womask[addr]); +return val & ~womask; +} + +/* "Internal" get/set operations */ +static inline uint64_t get_quad_raw(IntelIOMMUState *s, hwaddr addr) +{ +return ldq_le_p(&s->csr[addr]); +} + +static inline uint32_t get_long_raw(IntelIOMMUState *s, hwaddr addr) +{ +return ldl_le_p(&s->csr[addr]); +} + +static inline uint32_t set_clear_mask_l
[Qemu-devel] [PATCH v2 2/3] intel-iommu: add DMAR table to ACPI tables
Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE exists, add DMAR table to ACPI RSDT table. For now the DMAR table indicates that there is only one hardware unit without INTR_REMAP capability on the platform. Signed-off-by: Le Tan --- hw/i386/acpi-build.c | 41 ++ hw/i386/acpi-defs.h | 70 2 files changed, 111 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index ebc5f03..8241621 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -45,6 +45,7 @@ #include "hw/i386/ich9.h" #include "hw/pci/pci_bus.h" #include "hw/pci-host/q35.h" +#include "hw/i386/intel_iommu.h" #include "hw/i386/q35-acpi-dsdt.hex" #include "hw/i386/acpi-dsdt.hex" @@ -1316,6 +1317,31 @@ build_mcfg_q35(GArray *table_data, GArray *linker, AcpiMcfgInfo *info) } static void +build_dmar_q35(GArray *table_data, GArray *linker) +{ +int dmar_start = table_data->len; + +AcpiTableDmar *dmar; +AcpiDmarHardwareUnit *drhd; + +dmar = acpi_data_push(table_data, sizeof(*dmar)); +dmar->host_address_width = 0x26;/* 0x26 + 1 = 39 */ +dmar->flags = 0;/* No intr_remap for now */ + +/* DMAR Remapping Hardware Unit Definition structure */ +drhd = acpi_data_push(table_data, sizeof(*drhd)); +drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); +drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */ +drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; +drhd->pci_segment = cpu_to_le16(0); +drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); + +build_header(linker, table_data, (void *)(table_data->data + dmar_start), + "DMAR", table_data->len - dmar_start, 1); +} + + +static void build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc) { AcpiTableHeader *dsdt; @@ -1436,6 +1462,17 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) return true; } +static bool acpi_has_iommu(void) +{ +bool ambiguous; +Object *intel_iommu; + +intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE, + &ambiguous); +return intel_iommu && !ambiguous; +} + + static void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) { @@ -1497,6 +1534,10 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) acpi_add_table(table_offsets, tables->table_data); build_mcfg_q35(tables->table_data, tables->linker, &mcfg); } +if (acpi_has_iommu()) { +acpi_add_table(table_offsets, tables->table_data); +build_dmar_q35(tables->table_data, tables->linker); +} /* Add tables supplied by user (if any) */ for (u = acpi_table_first(); u; u = acpi_table_next(u)) { diff --git a/hw/i386/acpi-defs.h b/hw/i386/acpi-defs.h index e93babb..9674825 100644 --- a/hw/i386/acpi-defs.h +++ b/hw/i386/acpi-defs.h @@ -314,4 +314,74 @@ struct AcpiTableMcfg { } QEMU_PACKED; typedef struct AcpiTableMcfg AcpiTableMcfg; +/* DMAR - DMA Remapping table r2.2 */ +struct AcpiTableDmar { +ACPI_TABLE_HEADER_DEF +uint8_t host_address_width; /* Maximum DMA physical addressability */ +uint8_t flags; +uint8_t reserved[10]; +} QEMU_PACKED; +typedef struct AcpiTableDmar AcpiTableDmar; + +/* Masks for Flags field above */ +#define ACPI_DMAR_INTR_REMAP(1) +#define ACPI_DMAR_X2APIC_OPT_OUT(2) + +/* + * DMAR sub-structures (Follow DMA Remapping table) + */ +#define ACPI_DMAR_SUB_HEADER_DEF /* Common ACPI DMAR sub-structure header */\ +uint16_t type; \ +uint16_t length; + +/* Values for sub-structure type for DMAR */ +enum { +ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, /* DRHD */ +ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */ +ACPI_DMAR_TYPE_ATSR = 2,/* ATSR */ +ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, /* RHSR */ +ACPI_DMAR_TYPE_ANDD = 4,/* ANDD */ +ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */ +}; + +/* + * Sub-structures for DMAR, correspond to Type in ACPI_DMAR_SUB_HEADER_DEF + */ + +/* DMAR Device Scope structures */ +struct AcpiDmarDeviceScope { +uint8_t type; +uint8_t length; +uint16_t reserved; +uint8_t enumeration_id; +uint8_t start_bus_number; +uint8_t path[0]; +} QEMU_PACKED; +typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope; + +/* Values for type in struct AcpiDmarDeviceScope */ +enum { +ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, +ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, +ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, +ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, +ACPI_DMAR_SCOPE_TYPE_HPET = 4, +ACPI_DMAR_SCOPE_TYPE_ACPI = 5, +ACPI_DMAR_SCOPE_TYPE_RESERVED = 6 /* Reserved for future use */ +}; + +/* 0: Hardware Unit Definition */ +struct AcpiDmarHardwareUnit { +ACPI_DMAR_SUB_HEADER_DEF +uin
[Qemu-devel] [PATCH v2 3/3] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch
Add Intel IOMMU emulation to q35 chipset and expose it to the guest. 1. Add a machine option. Users can use "-machine iommu=on|off" in the command line to enable/disable Intel IOMMU. The default is off. 2. Accroding to the machine option, q35 will initialize the Intel IOMMU and use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function for the pci bus. 3. q35_host_dma_iommu() will return different address space according to the bus_num and devfn of the device. Signed-off-by: Le Tan --- hw/core/machine.c | 27 -- hw/pci-host/q35.c | 72 +++ include/hw/boards.h | 1 + include/hw/pci-host/q35.h | 2 ++ qemu-options.hx | 5 +++- vl.c | 4 +++ 6 files changed, 102 insertions(+), 9 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index cbba679..9b166e5 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const char *value, Error **errp) ms->firmware = g_strdup(value); } +static bool machine_get_iommu(Object *obj, Error **errp) +{ +MachineState *ms = MACHINE(obj); + +return ms->iommu; +} + +static void machine_set_iommu(Object *obj, bool value, Error **errp) +{ +MachineState *ms = MACHINE(obj); + +ms->iommu = value; +} + static void machine_initfn(Object *obj) { object_property_add_str(obj, "accel", @@ -270,10 +284,17 @@ static void machine_initfn(Object *obj) machine_set_dump_guest_core, NULL); object_property_add_bool(obj, "mem-merge", - machine_get_mem_merge, machine_set_mem_merge, NULL); -object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, NULL); + machine_get_mem_merge, + machine_set_mem_merge, NULL); +object_property_add_bool(obj, "usb", + machine_get_usb, + machine_set_usb, NULL); object_property_add_str(obj, "firmware", -machine_get_firmware, machine_set_firmware, NULL); +machine_get_firmware, +machine_set_firmware, NULL); +object_property_add_bool(obj, "iommu", + machine_get_iommu, + machine_set_iommu, NULL); } static void machine_finalize(Object *obj) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index a0a3068..4abd0ee 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -347,6 +347,61 @@ static void mch_reset(DeviceState *qdev) mch_update(mch); } +static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) +{ +IntelIOMMUState *s = opaque; +VTDAddressSpace **pvtd_as; +VTDAddressSpace *vtd_as; +int bus_num = pci_bus_num(bus); + +assert(devfn >= 0); + +pvtd_as = s->address_spaces[bus_num]; +if (!pvtd_as) { +/* No corresponding free() */ +pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) * +VTD_PCI_SLOT_MAX * VTD_PCI_FUNC_MAX); +s->address_spaces[bus_num] = pvtd_as; +} + +vtd_as = *(pvtd_as + devfn); +if (!vtd_as) { +vtd_as = g_malloc0(sizeof(*vtd_as)); +*(pvtd_as + devfn) = vtd_as; + +vtd_as->bus_num = bus_num; +vtd_as->devfn = devfn; +vtd_as->iommu_state = s; +memory_region_init_iommu(&vtd_as->iommu, OBJECT(s), &s->iommu_ops, + "intel_iommu", UINT64_MAX); +address_space_init(&vtd_as->as, &vtd_as->iommu, "intel_iommu"); +} + +return &vtd_as->as; +} + +static void mch_init_dmar(MCHPCIState *mch) +{ +Error *error = NULL; +PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch))); + +mch->iommu = INTEL_IOMMU_DEVICE(object_new(TYPE_INTEL_IOMMU_DEVICE)); +qdev_set_parent_bus(DEVICE(mch->iommu), sysbus_get_default()); +object_property_set_bool(OBJECT(mch->iommu), true, "realized", &error); + +if (error) { +fprintf(stderr, "%s\n", error_get_pretty(error)); +error_free(error); +return; +} + +memory_region_add_subregion(mch->pci_address_space, +Q35_HOST_BRIDGE_IOMMU_ADDR, +&mch->iommu->csrmem); +pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu); +} + + static int mch_init(PCIDevice *d) { int i; @@ -363,13 +418,20 @@ static int mch_init(PCIDevice *d) memory_region_add_subregion_overlap(mch->system_memory, 0xa, &mch->smram_region, 1); memory_region_set_enabled(&am
[Qemu-devel] vfio in the guest: no available reset mechanism
Hi, I am testing vfio in L1 with my VT-d emulation project. I assigned one of the two AHCI controllers in L1 to L2 via vfio. After I ran the QEMU in L1, it complains that: qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no available reset mechanism. qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no available reset mechanism. Then L2 paused when the SeaBIOS executed in ahci_controller_setup(). I look into this and found that: val = ahci_ctrl_readl(ctrl, HOST_CTL); ahci_ctrl_writel(ctrl, HOST_CTL, val | HOST_CTL_AHCI_EN); When the BIOS tried to read the HOST_CTL, it returns 0x8002, which bit 2 (Interrupt Enable) is 1. The AHCI manual says that this bit should be cleared by default. So maybe L1 didn't reset the device before assigning it to L2? Then the BIOS tried to write back to HOST_CTL and it was stuck here. :( So can anyone give me some advice? About the state of PCI device or bus-level reset? Here is the detail of the environment and the way I did the vfio. 1. lspci in L1 said: 00:03.0 SATA controller [0106]: Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02) 00:1f.0 ISA bridge [0601]: Intel Corporation 82801IB (ICH9) LPC Interface Controller [8086:2918] (rev 02) 00:1f.2 SATA controller [0106]: Intel Corporation 82801IR/IO/IH (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02) 00:1f.3 SMBus [0c05]: Intel Corporation 82801I (ICH9 Family) SMBus Controller [8086:2930] (rev 02) 2. Unbind 00:03.0 and do vfio: modprobe -r vfio_iommu_type1 modprobe vfio_iommu_type1 allow_unsafe_interrupts=1 modprobe vfio-pci echo :00:03.0 > /sys/bus/pci/devices/\:00\:03.0/driver/unbind echo "8086 2922" > /sys/bus/pci/drivers/vfio-pci/new_id 3. run L2 with "-device vfio-pci,host=00:03.0" Any help is appreciated! Thanks very much! Regards, Le
Re: [Qemu-devel] vfio in the guest: no available reset mechanism
Hi Michael, 2014-07-30 21:16 GMT+08:00 Michael S. Tsirkin : > On Wed, Jul 30, 2014 at 08:24:04PM +0800, Le Tan wrote: >> Hi, >> I am testing vfio in L1 with my VT-d emulation project. I assigned one >> of the two AHCI controllers in L1 to L2 via vfio. After I ran the QEMU >> in L1, it complains that: >> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no >> available reset mechanism. >> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no >> available reset mechanism. >> >> Then L2 paused when the SeaBIOS executed in ahci_controller_setup(). I >> look into this and found that: >> val = ahci_ctrl_readl(ctrl, HOST_CTL); >> ahci_ctrl_writel(ctrl, HOST_CTL, val | HOST_CTL_AHCI_EN); >> When the BIOS tried to read the HOST_CTL, it returns 0x8002, which >> bit 2 (Interrupt Enable) is 1. The AHCI manual says that this bit >> should be cleared by default. So maybe L1 didn't reset the device >> before assigning it to L2? >> Then the BIOS tried to write back to HOST_CTL and it was stuck here. :( >> >> So can anyone give me some advice? About the state of PCI device or >> bus-level reset? >> >> Here is the detail of the environment and the way I did the vfio. >> 1. lspci in L1 said: >> 00:03.0 SATA controller [0106]: Intel Corporation 82801IR/IO/IH >> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02) >> 00:1f.0 ISA bridge [0601]: Intel Corporation 82801IB (ICH9) LPC >> Interface Controller [8086:2918] (rev 02) >> 00:1f.2 SATA controller [0106]: Intel Corporation 82801IR/IO/IH >> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02) >> 00:1f.3 SMBus [0c05]: Intel Corporation 82801I (ICH9 Family) SMBus >> Controller [8086:2930] (rev 02) >> 2. Unbind 00:03.0 and do vfio: >> modprobe -r vfio_iommu_type1 >> modprobe vfio_iommu_type1 allow_unsafe_interrupts=1 >> modprobe vfio-pci >> echo :00:03.0 > /sys/bus/pci/devices/\:00\:03.0/driver/unbind >> echo "8086 2922" > /sys/bus/pci/drivers/vfio-pci/new_id >> 3. run L2 with "-device vfio-pci,host=00:03.0" >> >> Any help is appreciated! Thanks very much! >> >> Regards, >> Le > > Clearly, bus level reset can't work for the root bus :) Thanks very much! I test the vfio with a second-bus ahci controller and it didn't complain about the lack of reset mechanism. :) And the return val of HOST_CTL is normal now (the same as emulated ahci controller). However, it still paused when the BIOS tried to write to the HOST_CTL. Do you have any idea? And we should just test vfio and legacy pci-assignment with second bus devices, not considering the root-bus devices? Thanks again! Regards, Le > -- > MST
Re: [Qemu-devel] vfio in the guest: no available reset mechanism
Hi Alex, 2014-07-30 22:46 GMT+08:00 Alex Williamson : > On Wed, 2014-07-30 at 22:16 +0800, Le Tan wrote: >> Hi Michael, >> >> 2014-07-30 21:16 GMT+08:00 Michael S. Tsirkin : >> > On Wed, Jul 30, 2014 at 08:24:04PM +0800, Le Tan wrote: >> >> Hi, >> >> I am testing vfio in L1 with my VT-d emulation project. I assigned one >> >> of the two AHCI controllers in L1 to L2 via vfio. After I ran the QEMU >> >> in L1, it complains that: >> >> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no >> >> available reset mechanism. >> >> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no >> >> available reset mechanism. >> >> >> >> Then L2 paused when the SeaBIOS executed in ahci_controller_setup(). I >> >> look into this and found that: >> >> val = ahci_ctrl_readl(ctrl, HOST_CTL); >> >> ahci_ctrl_writel(ctrl, HOST_CTL, val | HOST_CTL_AHCI_EN); >> >> When the BIOS tried to read the HOST_CTL, it returns 0x8002, which >> >> bit 2 (Interrupt Enable) is 1. The AHCI manual says that this bit >> >> should be cleared by default. So maybe L1 didn't reset the device >> >> before assigning it to L2? >> >> Then the BIOS tried to write back to HOST_CTL and it was stuck here. :( >> >> >> >> So can anyone give me some advice? About the state of PCI device or >> >> bus-level reset? >> >> >> >> Here is the detail of the environment and the way I did the vfio. >> >> 1. lspci in L1 said: >> >> 00:03.0 SATA controller [0106]: Intel Corporation 82801IR/IO/IH >> >> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02) >> >> 00:1f.0 ISA bridge [0601]: Intel Corporation 82801IB (ICH9) LPC >> >> Interface Controller [8086:2918] (rev 02) >> >> 00:1f.2 SATA controller [0106]: Intel Corporation 82801IR/IO/IH >> >> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02) >> >> 00:1f.3 SMBus [0c05]: Intel Corporation 82801I (ICH9 Family) SMBus >> >> Controller [8086:2930] (rev 02) >> >> 2. Unbind 00:03.0 and do vfio: >> >> modprobe -r vfio_iommu_type1 >> >> modprobe vfio_iommu_type1 allow_unsafe_interrupts=1 >> >> modprobe vfio-pci >> >> echo :00:03.0 > /sys/bus/pci/devices/\:00\:03.0/driver/unbind >> >> echo "8086 2922" > /sys/bus/pci/drivers/vfio-pci/new_id >> >> 3. run L2 with "-device vfio-pci,host=00:03.0" >> >> >> >> Any help is appreciated! Thanks very much! >> >> >> >> Regards, >> >> Le >> > >> > Clearly, bus level reset can't work for the root bus :) >> >> Thanks very much! >> I test the vfio with a second-bus ahci controller and it didn't >> complain about the lack of reset mechanism. :) And the return val of >> HOST_CTL is normal now (the same as emulated ahci controller). >> However, it still paused when the BIOS tried to write to the HOST_CTL. >> Do you have any idea? >> And we should just test vfio and legacy pci-assignment with second bus >> devices, not considering the root-bus devices? > > AHCI seems like a poor choice of device for this work, they typically > don't support any kind of reset and they can be troublesome even for the > L1 assignment. You really want something with FLR support so that both > the host and L1 guest can potentially reset the device. That said, you > may still run into bugs with a L1 guest directed FLR. Thanks, > > Alex > So what device do you think is suitable for the pci-assign test? e1000? I just tested it with sound card ac97 and USB controller. But I don't know how to attach them to a pcie-to-pci bridge, so maybe they weren't reset before being assigned to L2. But it seems that they can work. 1. With the sound card, I assigned it to L2 via both vfio and legacy pci-assign and I could hear the music played in L2 from host's speakers. Of course, the vfio also complained about the lack of reset mechanism. 2. With the USB controller, I used "-usb -usbdevice disk:file" to attach a USB disk to L1. But there were 4 related devices in L1, so I didn't know what should be assigned to L2. the info qtree was like this: dev: ich9-usb-uhci3, id "" masterbus = "usb-bus.0" firstport = 4 (0x4) bandwidth = 1280 (0x500) maxframes = 128 (0x80) addr = 1d.2 dev: ich9-usb-uhci2, id "" masterbus = "usb-bus.0" firstport = 2 (0x2)
Re: [Qemu-devel] vfio in the guest: no available reset mechanism
2014-08-01 23:25 GMT+08:00 Alex Williamson : > On Fri, 2014-08-01 at 09:35 +0800, Le Tan wrote: >> Hi Alex, >> >> 2014-07-30 22:46 GMT+08:00 Alex Williamson : >> > On Wed, 2014-07-30 at 22:16 +0800, Le Tan wrote: >> >> Hi Michael, >> >> >> >> 2014-07-30 21:16 GMT+08:00 Michael S. Tsirkin : >> >> > On Wed, Jul 30, 2014 at 08:24:04PM +0800, Le Tan wrote: >> >> >> Hi, >> >> >> I am testing vfio in L1 with my VT-d emulation project. I assigned one >> >> >> of the two AHCI controllers in L1 to L2 via vfio. After I ran the QEMU >> >> >> in L1, it complains that: >> >> >> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no >> >> >> available reset mechanism. >> >> >> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no >> >> >> available reset mechanism. >> >> >> >> >> >> Then L2 paused when the SeaBIOS executed in ahci_controller_setup(). I >> >> >> look into this and found that: >> >> >> val = ahci_ctrl_readl(ctrl, HOST_CTL); >> >> >> ahci_ctrl_writel(ctrl, HOST_CTL, val | HOST_CTL_AHCI_EN); >> >> >> When the BIOS tried to read the HOST_CTL, it returns 0x8002, which >> >> >> bit 2 (Interrupt Enable) is 1. The AHCI manual says that this bit >> >> >> should be cleared by default. So maybe L1 didn't reset the device >> >> >> before assigning it to L2? >> >> >> Then the BIOS tried to write back to HOST_CTL and it was stuck here. :( >> >> >> >> >> >> So can anyone give me some advice? About the state of PCI device or >> >> >> bus-level reset? >> >> >> >> >> >> Here is the detail of the environment and the way I did the vfio. >> >> >> 1. lspci in L1 said: >> >> >> 00:03.0 SATA controller [0106]: Intel Corporation 82801IR/IO/IH >> >> >> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02) >> >> >> 00:1f.0 ISA bridge [0601]: Intel Corporation 82801IB (ICH9) LPC >> >> >> Interface Controller [8086:2918] (rev 02) >> >> >> 00:1f.2 SATA controller [0106]: Intel Corporation 82801IR/IO/IH >> >> >> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02) >> >> >> 00:1f.3 SMBus [0c05]: Intel Corporation 82801I (ICH9 Family) SMBus >> >> >> Controller [8086:2930] (rev 02) >> >> >> 2. Unbind 00:03.0 and do vfio: >> >> >> modprobe -r vfio_iommu_type1 >> >> >> modprobe vfio_iommu_type1 allow_unsafe_interrupts=1 >> >> >> modprobe vfio-pci >> >> >> echo :00:03.0 > /sys/bus/pci/devices/\:00\:03.0/driver/unbind >> >> >> echo "8086 2922" > /sys/bus/pci/drivers/vfio-pci/new_id >> >> >> 3. run L2 with "-device vfio-pci,host=00:03.0" >> >> >> >> >> >> Any help is appreciated! Thanks very much! >> >> >> >> >> >> Regards, >> >> >> Le >> >> > >> >> > Clearly, bus level reset can't work for the root bus :) >> >> >> >> Thanks very much! >> >> I test the vfio with a second-bus ahci controller and it didn't >> >> complain about the lack of reset mechanism. :) And the return val of >> >> HOST_CTL is normal now (the same as emulated ahci controller). >> >> However, it still paused when the BIOS tried to write to the HOST_CTL. >> >> Do you have any idea? >> >> And we should just test vfio and legacy pci-assignment with second bus >> >> devices, not considering the root-bus devices? >> > >> > AHCI seems like a poor choice of device for this work, they typically >> > don't support any kind of reset and they can be troublesome even for the >> > L1 assignment. You really want something with FLR support so that both >> > the host and L1 guest can potentially reset the device. That said, you >> > may still run into bugs with a L1 guest directed FLR. Thanks, >> > >> > Alex >> > >> >> So what device do you think is suitable for the pci-assign test? e1000? >> I just tested it with sound card ac97 and USB controller. But I don't >> know how to attach them to a pcie-to-pci bridge, so maybe they weren't >> reset before being assigned to L2.
[Qemu-devel] About the VT-d features that q35 chipset supports
Hi, I am now adding features to the VT-d emulation for q35 chipset. I think it is good to know what features q35 chipset supports exactly. However I can't find materials about this. Does anyone know this? Or it will be fine if someone can tell me the value of the Capability Register and Extended Capability Register of the VT-d on q35. Any help is appreciated! Thanks very much! Regards, Le
Re: [Qemu-devel] About the VT-d features that q35 chipset supports
Hi Jan, 2014-08-06 1:08 GMT+08:00 Jan Kiszka : > On 2014-08-05 14:55, Le Tan wrote: >> Hi, >> I am now adding features to the VT-d emulation for q35 chipset. I >> think it is good to know what features q35 chipset supports exactly. >> However I can't find materials about this. Does anyone know this? Or >> it will be fine if someone can tell me the value of the Capability >> Register and Extended Capability Register of the VT-d on q35. > > The kernel reports those registers during boot-up, so we would just need > "dmesg | grep dmar" from a real 82Q35 box (Intel 3 Series). Thanks! But my computer seems not to be Intel 3 Series. It seems to be "7 Series/C210 Series Chipset Family". What's more, Acer seems to disable and hide the VT-d support of it. Le > Jan > >> Any help is appreciated! >> Thanks very much! >> >> Regards, >> Le >> > >
[Qemu-devel] [PATCH v3 0/5] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset
Hi, These patches are intended to introduce Intel IOMMU (VT-d) emulation to q35 chipset. The major job in these patches is to add support for emulating Intel IOMMU according to the VT-d specification, including basic responses to CSRs accesses, the logics of DMAR (DMA remapping) and DMA memory address translations. Features implemented for now are: 1. Response to important CSRs accesses; 2. DMAR (DMA remapping) without PASID support; 3. Primary fault logging; 4. Support both register-based and queued invalidation for IOTLB and context cache invalidation; 5. Add DMAR table to ACPI tables to expose VT-d to BIOS; 6. Add "-machine iommu=on|off" option to enable/disable VT-d; 7. Only one DMAR unit for all the devices of PCI Segment 0. Testing: 1. L1 guest with Linux with intel_iommu=on can interact with VT-d and boot smoothly, and there exists information about VT-d in the log of kernel; 2. Run L1 with VT-d, L2 guest with Linux can boot smoothly withou PCI device passthrough; 3. Run L1 with VT-d and "-soundhw ac97 (QEMU_AUDIO_DRV=alsa)", then assign the sound card to L2; L2 can boot smoothly with legacy PCI assignment and I can hear the music played in L2 from the host speakers; 4. Jailhouse hypervisor can run smoothly(tested by Jan). 5. Run L1 with VT-d and e1000 network card, then assign e1000 to L2; L2 will be STUCK when booting. This still remains unsolved now. As far as I know, I suppose that the L2 crashes when doing e1000_probe(). The QEMU of L1 will dump something with "KVM: entry failed, hardware error 0x0", and the KVM of host will print "nested_vmx_exit_handled failed vm entry 7". Unlike assigning the sound card, after being assigned to L2, there is no translation entry of e1000 through VT-d, which I think means that e1000 doesn't issue any DMA access during the boot of L2. Sometimes the kernel of L2 will print "divide error" during booting. Maybe it results from the lack of reset mechanism. 6. VFIO is tested and is similar to legacy pci assignment. Discussion: 1. There is one functionality called Zero-Length-Read (ZLR) which supports zero length DMA read requests to write-only pages. If the VT-d emulation supports ZLR, we need to know the exact length of one access. For now can QEMU express zero-length requests? TODO: 1. Context cache and IOTLB cache; 2. Fix the bug of legacy PCI assignment; Changes since v2: *address reviewing suggestions given by Jan -add support for primary fault logging -add support for queued invalidation Changes since v1: *address reviewing suggestions given by Michael, Paolo, Stefan and Jan -split intel_iommu.h to include/hw/i386/intel_iommu.h and hw/i386/intel_iommu_internal.h -change the copyright information -change D() to VTD_DPRINTF() -remove dead code -rename constant definitions with consistent prefix VTD_ -rename some struct definitions according to QEMU standard -rename some CSRs access functions -use endian-save functions to access CSRs -change machine option to "iommu=on|off" Thanks very much! Git trees: https://github.com/tamlok/qemu Le Tan (5): iommu: add is_write as a parameter to the translate function of MemoryRegionIOMMUOps intel-iommu: introduce Intel IOMMU (VT-d) emulation intel-iommu: add DMAR table to ACPI tables intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch intel-iommu: add supports for queued invalidation interface exec.c |2 +- hw/alpha/typhoon.c |3 +- hw/core/machine.c | 27 +- hw/i386/Makefile.objs |1 + hw/i386/acpi-build.c | 41 + hw/i386/acpi-defs.h| 70 ++ hw/i386/intel_iommu.c | 1722 hw/i386/intel_iommu_internal.h | 358 + hw/pci-host/apb.c |3 +- hw/pci-host/q35.c | 64 +- hw/ppc/spapr_iommu.c |3 +- include/exec/memory.h |2 +- include/hw/boards.h|1 + include/hw/i386/intel_iommu.h | 90 +++ include/hw/pci-host/q35.h |2 + qemu-options.hx|5 +- vl.c |4 + 17 files changed, 2384 insertions(+), 14 deletions(-) create mode 100644 hw/i386/intel_iommu.c create mode 100644 hw/i386/intel_iommu_internal.h create mode 100644 include/hw/i386/intel_iommu.h -- 1.9.1
[Qemu-devel] [PATCH v3 5/5] intel-iommu: add supports for queued invalidation interface
Add supports for queued invalidation interface, an expended invalidation interface with extended capabilities. Signed-off-by: Le Tan --- hw/i386/intel_iommu.c | 381 - hw/i386/intel_iommu_internal.h | 15 +- 2 files changed, 393 insertions(+), 3 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index b3a4f78..0e7d62d 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -332,6 +332,43 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, } } +/* Handle Invalidation Queue Errors of queued invalidation interface error + * conditions. + */ +static void vtd_handle_inv_queue_error(IntelIOMMUState *s) +{ +uint32_t fsts_reg = get_long_raw(s, DMAR_FSTS_REG); + +set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE); +vtd_generate_fault_event(s, fsts_reg); +} + +/* Set the IWC field and try to generate an invalidation completion interrupt */ +static void vtd_generate_completion_event(IntelIOMMUState *s) +{ +VTD_DPRINTF(INV, "completes an invalidation wait command with " +"Interrupt Flag"); +if (get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) { +VTD_DPRINTF(INV, "there is a previous interrupt condition to be " +"serviced by software, " +"new invalidation event is not generated"); +return; +} + +set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC); +set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP); +if (get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) { +VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation " +"event is not generated"); +return; +} else { +/* Generate the interrupt event */ +vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); +set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); +} + +} + static inline bool root_entry_present(VTDRootEntry *root) { return root->val & VTD_ROOT_ENTRY_P; @@ -809,6 +846,54 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val) return iaig; } +static inline bool queued_inv_enable_check(IntelIOMMUState *s) +{ +return s->iq_tail == 0; +} + +static inline bool queued_inv_disable_check(IntelIOMMUState *s) +{ +return s->qi_enabled && (s->iq_tail == s->iq_head) && + (s->iq_last_desc_type == VTD_INV_DESC_WAIT); +} + +static void handle_gcmd_qie(IntelIOMMUState *s, bool en) +{ +uint64_t iqa_val = get_quad_raw(s, DMAR_IQA_REG); +VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off")); + +if (en) { +if (queued_inv_enable_check(s)) { +s->iq = iqa_val & VTD_IQA_IQA_MASK; +/* 2^(x+8) entries */ +s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); +s->qi_enabled = true; +VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val); +VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d", +s->iq, s->iq_size); +/* Ok - report back to driver */ +set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES); +} else { +VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: " +"tail %"PRIu16, s->iq_tail); +} +} else { +if (queued_inv_disable_check(s)) { +/* disable Queued Invalidation */ +set_quad_raw(s, DMAR_IQH_REG, 0); +s->iq_head = 0; +s->qi_enabled = false; +/* Ok - report back to driver */ +set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0); +} else { +VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: " +"head %"PRIu16 ", tail %"PRIu16 +", last_descriptor %"PRIu8, +s->iq_head, s->iq_tail, s->iq_last_desc_type); +} +} +} + /* Set Root Table Pointer */ static void handle_gcmd_srtp(IntelIOMMUState *s) { @@ -854,6 +939,10 @@ static void handle_gcmd_write(IntelIOMMUState *s) /* Set/update the root-table pointer */ handle_gcmd_srtp(s); } +if (changed & VTD_GCMD_QIE) { +/* Queued Invalidation Enable */ +handle_gcmd_qie(s, val & VTD_GCMD_QIE); +} } /* Handle write to Context Command Register */ @@ -864,6 +953,11 @@ static void handle_ccmd_write(IntelIOMMUState *s) /* Context-cache invalidation request */ if (val & VTD_CCMD_ICC) { +if (s->qi_enabled) { +VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, " +
[Qemu-devel] [PATCH v3 2/5] intel-iommu: introduce Intel IOMMU (VT-d) emulation
Add support for emulating Intel IOMMU according to the VT-d specification for the q35 chipset machine. Implement the logics for DMAR (DMA remapping) without PASID support. The emulation supports register-based invalidation and primary fault logging. Signed-off-by: Le Tan --- hw/i386/Makefile.objs |1 + hw/i386/intel_iommu.c | 1345 hw/i386/intel_iommu_internal.h | 345 +++ include/hw/i386/intel_iommu.h | 90 +++ 4 files changed, 1781 insertions(+) create mode 100644 hw/i386/intel_iommu.c create mode 100644 hw/i386/intel_iommu_internal.h create mode 100644 include/hw/i386/intel_iommu.h diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index 48014ab..6936111 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/ obj-y += multiboot.o smbios.o obj-y += pc.o pc_piix.o pc_q35.o obj-y += pc_sysfw.o +obj-y += intel_iommu.o obj-$(CONFIG_XEN) += ../xenpv/ xen/ obj-y += kvmvapic.o diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c new file mode 100644 index 000..b3a4f78 --- /dev/null +++ b/hw/i386/intel_iommu.c @@ -0,0 +1,1345 @@ +/* + * QEMU emulation of an Intel IOMMU (VT-d) + * (DMA Remapping device) + * + * Copyright (C) 2013 Knut Omang, Oracle + * Copyright (C) 2014 Le Tan, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/sysbus.h" +#include "exec/address-spaces.h" +#include "intel_iommu_internal.h" + + +/*#define DEBUG_INTEL_IOMMU*/ +#ifdef DEBUG_INTEL_IOMMU +enum { +DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, +}; +#define VTD_DBGBIT(x) (1 << DEBUG_##x) +static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR) | + VTD_DBGBIT(FLOG); + +#define VTD_DPRINTF(what, fmt, ...) do { \ +if (vtd_dbgflags & VTD_DBGBIT(what)) { \ +fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ +## __VA_ARGS__); } \ +} while (0) +#else +#define VTD_DPRINTF(what, fmt, ...) do {} while (0) +#endif + +static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, + uint64_t wmask, uint64_t w1cmask) +{ +stq_le_p(&s->csr[addr], val); +stq_le_p(&s->wmask[addr], wmask); +stq_le_p(&s->w1cmask[addr], w1cmask); +} + +static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr, + uint64_t mask) +{ +stq_le_p(&s->womask[addr], mask); +} + +static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val, + uint32_t wmask, uint32_t w1cmask) +{ +stl_le_p(&s->csr[addr], val); +stl_le_p(&s->wmask[addr], wmask); +stl_le_p(&s->w1cmask[addr], w1cmask); +} + +static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr, + uint32_t mask) +{ +stl_le_p(&s->womask[addr], mask); +} + +/* "External" get/set operations */ +static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) +{ +uint64_t oldval = ldq_le_p(&s->csr[addr]); +uint64_t wmask = ldq_le_p(&s->wmask[addr]); +uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); +stq_le_p(&s->csr[addr], + ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); +} + +static inline void set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val) +{ +uint32_t oldval = ldl_le_p(&s->csr[addr]); +uint32_t wmask = ldl_le_p(&s->wmask[addr]); +uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); +stl_le_p(&s->csr[addr], + ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); +} + +static inline uint64_t get_quad(IntelIOMMUState *s, hwaddr addr) +{ +uint64_t val = ldq_le_p(&s->csr[addr]); +uint64_t womask = ldq_le_p(&s->womask[addr]); +return val & ~womask; +} + + +static inline uint32_t get_long(IntelIOMMUState *s, hwaddr addr) +{ +uint32_t val = ldl_le_p(&s->csr[addr]); +uint32_t womask = ldl_le_p(&s->womask[addr]); +return val & ~womask; +} + +/* "Internal" get/set operations */ +static inline uint64_t get_quad_raw(Intel
[Qemu-devel] [PATCH v3 3/5] intel-iommu: add DMAR table to ACPI tables
Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE exists, add DMAR table to ACPI RSDT table. For now the DMAR table indicates that there is only one hardware unit without INTR_REMAP capability on the platform. Signed-off-by: Le Tan --- hw/i386/acpi-build.c | 41 ++ hw/i386/acpi-defs.h | 70 2 files changed, 111 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 816c6d9..595f501 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -47,6 +47,7 @@ #include "hw/i386/ich9.h" #include "hw/pci/pci_bus.h" #include "hw/pci-host/q35.h" +#include "hw/i386/intel_iommu.h" #include "hw/i386/q35-acpi-dsdt.hex" #include "hw/i386/acpi-dsdt.hex" @@ -1350,6 +1351,31 @@ build_mcfg_q35(GArray *table_data, GArray *linker, AcpiMcfgInfo *info) } static void +build_dmar_q35(GArray *table_data, GArray *linker) +{ +int dmar_start = table_data->len; + +AcpiTableDmar *dmar; +AcpiDmarHardwareUnit *drhd; + +dmar = acpi_data_push(table_data, sizeof(*dmar)); +dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; +dmar->flags = 0;/* No intr_remap for now */ + +/* DMAR Remapping Hardware Unit Definition structure */ +drhd = acpi_data_push(table_data, sizeof(*drhd)); +drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); +drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */ +drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; +drhd->pci_segment = cpu_to_le16(0); +drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); + +build_header(linker, table_data, (void *)(table_data->data + dmar_start), + "DMAR", table_data->len - dmar_start, 1); +} + + +static void build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc) { AcpiTableHeader *dsdt; @@ -1470,6 +1496,17 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) return true; } +static bool acpi_has_iommu(void) +{ +bool ambiguous; +Object *intel_iommu; + +intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE, + &ambiguous); +return intel_iommu && !ambiguous; +} + + static void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) { @@ -1539,6 +1576,10 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) acpi_add_table(table_offsets, tables->table_data); build_mcfg_q35(tables->table_data, tables->linker, &mcfg); } +if (acpi_has_iommu()) { +acpi_add_table(table_offsets, tables->table_data); +build_dmar_q35(tables->table_data, tables->linker); +} /* Add tables supplied by user (if any) */ for (u = acpi_table_first(); u; u = acpi_table_next(u)) { diff --git a/hw/i386/acpi-defs.h b/hw/i386/acpi-defs.h index e93babb..9674825 100644 --- a/hw/i386/acpi-defs.h +++ b/hw/i386/acpi-defs.h @@ -314,4 +314,74 @@ struct AcpiTableMcfg { } QEMU_PACKED; typedef struct AcpiTableMcfg AcpiTableMcfg; +/* DMAR - DMA Remapping table r2.2 */ +struct AcpiTableDmar { +ACPI_TABLE_HEADER_DEF +uint8_t host_address_width; /* Maximum DMA physical addressability */ +uint8_t flags; +uint8_t reserved[10]; +} QEMU_PACKED; +typedef struct AcpiTableDmar AcpiTableDmar; + +/* Masks for Flags field above */ +#define ACPI_DMAR_INTR_REMAP(1) +#define ACPI_DMAR_X2APIC_OPT_OUT(2) + +/* + * DMAR sub-structures (Follow DMA Remapping table) + */ +#define ACPI_DMAR_SUB_HEADER_DEF /* Common ACPI DMAR sub-structure header */\ +uint16_t type; \ +uint16_t length; + +/* Values for sub-structure type for DMAR */ +enum { +ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, /* DRHD */ +ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */ +ACPI_DMAR_TYPE_ATSR = 2,/* ATSR */ +ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, /* RHSR */ +ACPI_DMAR_TYPE_ANDD = 4,/* ANDD */ +ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */ +}; + +/* + * Sub-structures for DMAR, correspond to Type in ACPI_DMAR_SUB_HEADER_DEF + */ + +/* DMAR Device Scope structures */ +struct AcpiDmarDeviceScope { +uint8_t type; +uint8_t length; +uint16_t reserved; +uint8_t enumeration_id; +uint8_t start_bus_number; +uint8_t path[0]; +} QEMU_PACKED; +typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope; + +/* Values for type in struct AcpiDmarDeviceScope */ +enum { +ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0, +ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1, +ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, +ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, +ACPI_DMAR_SCOPE_TYPE_HPET = 4, +ACPI_DMAR_SCOPE_TYPE_ACPI = 5, +ACPI_DMAR_SCOPE_TYPE_RESERVED = 6 /* Reserved for future use */ +}; + +/* 0: Hardware Unit Definition */ +struct AcpiDmarHardwareUnit { +ACPI_DMAR_SUB_HEADER_DEF +uin
[Qemu-devel] [PATCH v3 1/5] iommu: add is_write as a parameter to the translate function of MemoryRegionIOMMUOps
Add a bool variable is_write as a parameter to the translate function of MemoryRegionIOMMUOps to indicate the operation of the access. It can be used for correct fault reporting from within the callback. Change the interface of related functions. Signed-off-by: Le Tan --- exec.c| 2 +- hw/alpha/typhoon.c| 3 ++- hw/pci-host/apb.c | 3 ++- hw/ppc/spapr_iommu.c | 3 ++- include/exec/memory.h | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/exec.c b/exec.c index 765bd94..5ccc106 100644 --- a/exec.c +++ b/exec.c @@ -373,7 +373,7 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, break; } -iotlb = mr->iommu_ops->translate(mr, addr); +iotlb = mr->iommu_ops->translate(mr, addr, is_write); addr = ((iotlb.translated_addr & ~iotlb.addr_mask) | (addr & iotlb.addr_mask)); len = MIN(len, (addr | iotlb.addr_mask) - addr + 1); diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index 67a1070..31947d9 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -660,7 +660,8 @@ static bool window_translate(TyphoonWindow *win, hwaddr addr, /* Handle PCI-to-system address translation. */ /* TODO: A translation failure here ought to set PCI error codes on the Pchip and generate a machine check interrupt. */ -static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr) +static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr, + bool is_write) { TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu); IOMMUTLBEntry ret; diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index d238a84..0e0e0ee 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -203,7 +203,8 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &is->iommu_as; } -static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr) +static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, + bool is_write) { IOMMUState *is = container_of(iommu, IOMMUState, iommu); hwaddr baseaddr, offset; diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index f6e32a4..6c91d8e 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -59,7 +59,8 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) return NULL; } -static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr) +static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, + bool is_write) { sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); uint64_t tce; diff --git a/include/exec/memory.h b/include/exec/memory.h index e2c8e3e..834543d 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -129,7 +129,7 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps; struct MemoryRegionIOMMUOps { /* Return a TLB entry that contains a given address. */ -IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr); +IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write); }; typedef struct CoalescedMemoryRange CoalescedMemoryRange; -- 1.9.1
[Qemu-devel] [PATCH v3 4/5] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch
Add Intel IOMMU emulation to q35 chipset and expose it to the guest. 1. Add a machine option. Users can use "-machine iommu=on|off" in the command line to enable/disable Intel IOMMU. The default is off. 2. Accroding to the machine option, q35 will initialize the Intel IOMMU and use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function for the pci bus. 3. q35_host_dma_iommu() will return different address space according to the bus_num and devfn of the device. Signed-off-by: Le Tan --- hw/core/machine.c | 27 +--- hw/pci-host/q35.c | 64 +++ include/hw/boards.h | 1 + include/hw/pci-host/q35.h | 2 ++ qemu-options.hx | 5 +++- vl.c | 4 +++ 6 files changed, 94 insertions(+), 9 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 7a66c57..f0046d6 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const char *value, Error **errp) ms->firmware = g_strdup(value); } +static bool machine_get_iommu(Object *obj, Error **errp) +{ +MachineState *ms = MACHINE(obj); + +return ms->iommu; +} + +static void machine_set_iommu(Object *obj, bool value, Error **errp) +{ +MachineState *ms = MACHINE(obj); + +ms->iommu = value; +} + static void machine_initfn(Object *obj) { object_property_add_str(obj, "accel", @@ -270,10 +284,17 @@ static void machine_initfn(Object *obj) machine_set_dump_guest_core, NULL); object_property_add_bool(obj, "mem-merge", - machine_get_mem_merge, machine_set_mem_merge, NULL); -object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, NULL); + machine_get_mem_merge, + machine_set_mem_merge, NULL); +object_property_add_bool(obj, "usb", + machine_get_usb, + machine_set_usb, NULL); object_property_add_str(obj, "firmware", -machine_get_firmware, machine_set_firmware, NULL); +machine_get_firmware, +machine_set_firmware, NULL); +object_property_add_bool(obj, "iommu", + machine_get_iommu, + machine_set_iommu, NULL); } static void machine_finalize(Object *obj) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index a0a3068..3342711 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -347,6 +347,53 @@ static void mch_reset(DeviceState *qdev) mch_update(mch); } +static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) +{ +IntelIOMMUState *s = opaque; +VTDAddressSpace **pvtd_as; +VTDAddressSpace *vtd_as; +int bus_num = pci_bus_num(bus); + +assert(devfn >= 0); + +pvtd_as = s->address_spaces[bus_num]; +if (!pvtd_as) { +/* No corresponding free() */ +pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) * +VTD_PCI_SLOT_MAX * VTD_PCI_FUNC_MAX); +s->address_spaces[bus_num] = pvtd_as; +} + +vtd_as = *(pvtd_as + devfn); +if (!vtd_as) { +vtd_as = g_malloc0(sizeof(*vtd_as)); +*(pvtd_as + devfn) = vtd_as; + +vtd_as->bus_num = bus_num; +vtd_as->devfn = devfn; +vtd_as->iommu_state = s; +memory_region_init_iommu(&vtd_as->iommu, OBJECT(s), &s->iommu_ops, + "intel_iommu", UINT64_MAX); +address_space_init(&vtd_as->as, &vtd_as->iommu, "intel_iommu"); +} + +return &vtd_as->as; +} + +static void mch_init_dmar(MCHPCIState *mch) +{ +PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch))); + +mch->iommu = INTEL_IOMMU_DEVICE(qdev_create(NULL, TYPE_INTEL_IOMMU_DEVICE)); +object_property_add_child(OBJECT(mch), "intel-iommu", + OBJECT(mch->iommu), NULL); +qdev_init_nofail(DEVICE(mch->iommu)); +sysbus_mmio_map(SYS_BUS_DEVICE(mch->iommu), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); + +pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu); +} + + static int mch_init(PCIDevice *d) { int i; @@ -363,13 +410,20 @@ static int mch_init(PCIDevice *d) memory_region_add_subregion_overlap(mch->system_memory, 0xa, &mch->smram_region, 1); memory_region_set_enabled(&mch->smram_region, false); -init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, mch->pci_address_space, - &mch->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE); +init_pam(DEVICE(mch), mch->ram_mem
Re: [Qemu-devel] [PATCH v3 2/5] intel-iommu: introduce Intel IOMMU (VT-d) emulation
Hi Jan, 2014-08-12 15:34 GMT+08:00 Jan Kiszka : > On 2014-08-11 09:04, Le Tan wrote: >> Add support for emulating Intel IOMMU according to the VT-d specification for >> the q35 chipset machine. Implement the logics for DMAR (DMA remapping) >> without >> PASID support. The emulation supports register-based invalidation and primary >> fault logging. > > Some arbitrary comments below (means, I didn't read every line and > likely missed some things). In general, this looks and works pretty good! > >> >> Signed-off-by: Le Tan >> --- >> hw/i386/Makefile.objs |1 + >> hw/i386/intel_iommu.c | 1345 >> >> hw/i386/intel_iommu_internal.h | 345 +++ >> include/hw/i386/intel_iommu.h | 90 +++ >> 4 files changed, 1781 insertions(+) >> create mode 100644 hw/i386/intel_iommu.c >> create mode 100644 hw/i386/intel_iommu_internal.h >> create mode 100644 include/hw/i386/intel_iommu.h >> >> diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs >> index 48014ab..6936111 100644 >> --- a/hw/i386/Makefile.objs >> +++ b/hw/i386/Makefile.objs >> @@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/ >> obj-y += multiboot.o smbios.o >> obj-y += pc.o pc_piix.o pc_q35.o >> obj-y += pc_sysfw.o >> +obj-y += intel_iommu.o >> obj-$(CONFIG_XEN) += ../xenpv/ xen/ >> >> obj-y += kvmvapic.o >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> new file mode 100644 >> index 000..b3a4f78 >> --- /dev/null >> +++ b/hw/i386/intel_iommu.c >> @@ -0,0 +1,1345 @@ >> +/* >> + * QEMU emulation of an Intel IOMMU (VT-d) >> + * (DMA Remapping device) >> + * >> + * Copyright (C) 2013 Knut Omang, Oracle >> + * Copyright (C) 2014 Le Tan, >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + >> + * You should have received a copy of the GNU General Public License along >> + * with this program; if not, see <http://www.gnu.org/licenses/>. >> + */ >> + >> +#include "hw/sysbus.h" >> +#include "exec/address-spaces.h" >> +#include "intel_iommu_internal.h" >> + >> + >> +/*#define DEBUG_INTEL_IOMMU*/ >> +#ifdef DEBUG_INTEL_IOMMU >> +enum { >> +DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, >> +}; >> +#define VTD_DBGBIT(x) (1 << DEBUG_##x) >> +static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR) | >> + VTD_DBGBIT(FLOG); >> + >> +#define VTD_DPRINTF(what, fmt, ...) do { \ >> +if (vtd_dbgflags & VTD_DBGBIT(what)) { \ >> +fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ >> +## __VA_ARGS__); } \ >> +} while (0) >> +#else >> +#define VTD_DPRINTF(what, fmt, ...) do {} while (0) >> +#endif >> + >> +static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t >> val, >> + uint64_t wmask, uint64_t w1cmask) > > In general, don't declare functions inline needlessly. It makes sense > for trivial ones you export via a header, but even a 2- or 3-liner like > this can be more efficient as stand-alone function. Anything bigger > definitely does not deserve that tag. > > Inline is just a hint to the compiler anyway, so you can perfectly leave > it out for any almost-trivial static function. OK, I will delete most of them except those just have one "return" line. >> +{ >> +stq_le_p(&s->csr[addr], val); >> +stq_le_p(&s->wmask[addr], wmask); >> +stq_le_p(&s->w1cmask[addr], w1cmask); >> +} >> + >> +static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr, >> + uint64_t mask) >> +{ >> +stq_le_p(&s->womask[addr], mask); >> +} >> + >> +static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t >> val, >> + uint32_t wmask, uint32_t w1cmask) >>
Re: [Qemu-devel] [PATCH v3 4/5] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch
2014-08-14 19:12 GMT+08:00 Michael S. Tsirkin : > On Mon, Aug 11, 2014 at 03:05:01PM +0800, Le Tan wrote: >> Add Intel IOMMU emulation to q35 chipset and expose it to the guest. >> 1. Add a machine option. Users can use "-machine iommu=on|off" in the command >> line to enable/disable Intel IOMMU. The default is off. >> 2. Accroding to the machine option, q35 will initialize the Intel IOMMU and >> use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function for >> the pci bus. >> 3. q35_host_dma_iommu() will return different address space according to the >> bus_num and devfn of the device. >> >> Signed-off-by: Le Tan >> --- >> hw/core/machine.c | 27 +--- >> hw/pci-host/q35.c | 64 >> +++ >> include/hw/boards.h | 1 + >> include/hw/pci-host/q35.h | 2 ++ >> qemu-options.hx | 5 +++- >> vl.c | 4 +++ >> 6 files changed, 94 insertions(+), 9 deletions(-) >> >> diff --git a/hw/core/machine.c b/hw/core/machine.c >> index 7a66c57..f0046d6 100644 >> --- a/hw/core/machine.c >> +++ b/hw/core/machine.c >> @@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const >> char *value, Error **errp) >> ms->firmware = g_strdup(value); >> } >> >> +static bool machine_get_iommu(Object *obj, Error **errp) >> +{ >> +MachineState *ms = MACHINE(obj); >> + >> +return ms->iommu; >> +} >> + >> +static void machine_set_iommu(Object *obj, bool value, Error **errp) >> +{ >> +MachineState *ms = MACHINE(obj); >> + >> +ms->iommu = value; >> +} >> + >> static void machine_initfn(Object *obj) >> { >> object_property_add_str(obj, "accel", >> @@ -270,10 +284,17 @@ static void machine_initfn(Object *obj) >> machine_set_dump_guest_core, >> NULL); >> object_property_add_bool(obj, "mem-merge", >> - machine_get_mem_merge, machine_set_mem_merge, >> NULL); >> -object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, >> NULL); >> + machine_get_mem_merge, >> + machine_set_mem_merge, NULL); >> +object_property_add_bool(obj, "usb", >> + machine_get_usb, >> + machine_set_usb, NULL); >> object_property_add_str(obj, "firmware", >> -machine_get_firmware, machine_set_firmware, >> NULL); >> +machine_get_firmware, >> +machine_set_firmware, NULL); >> +object_property_add_bool(obj, "iommu", >> + machine_get_iommu, >> + machine_set_iommu, NULL); >> } >> >> static void machine_finalize(Object *obj) >> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c >> index a0a3068..3342711 100644 >> --- a/hw/pci-host/q35.c >> +++ b/hw/pci-host/q35.c >> @@ -347,6 +347,53 @@ static void mch_reset(DeviceState *qdev) >> mch_update(mch); >> } >> >> +static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int >> devfn) >> +{ >> +IntelIOMMUState *s = opaque; >> +VTDAddressSpace **pvtd_as; >> +VTDAddressSpace *vtd_as; >> +int bus_num = pci_bus_num(bus); >> + >> +assert(devfn >= 0); >> + >> +pvtd_as = s->address_spaces[bus_num]; >> +if (!pvtd_as) { >> +/* No corresponding free() */ >> +pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) * >> +VTD_PCI_SLOT_MAX * VTD_PCI_FUNC_MAX); >> +s->address_spaces[bus_num] = pvtd_as; >> +} >> + >> +vtd_as = *(pvtd_as + devfn); > > pvtd_as[devfn] is cleaner. > In fact, you can then drop vtd_as local variable, use pvtd_as[devfn] > directly. > >> +if (!vtd_as) { >> +vtd_as = g_malloc0(sizeof(*vtd_as)); >> +*(pvtd_as + devfn) = vtd_as; >> + >> +vtd_as->bus_num = bus_num; >> +vtd_as->devfn = devfn; >> +vtd_as->iommu_state = s; >> +memory_region_init_iommu(&vtd_as->iommu, OBJECT(s), &s->iommu_ops, >> + "intel_iommu", UINT64_MAX); >> +address_space_
Re: [Qemu-devel] [PATCH v3 3/5] intel-iommu: add DMAR table to ACPI tables
Hi Michael, 2014-08-14 19:06 GMT+08:00 Michael S. Tsirkin : > On Mon, Aug 11, 2014 at 03:05:00PM +0800, Le Tan wrote: >> Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE exists, >> add DMAR table to ACPI RSDT table. For now the DMAR table indicates that >> there >> is only one hardware unit without INTR_REMAP capability on the platform. >> >> Signed-off-by: Le Tan > > Could you add a unit test please? > >> --- >> hw/i386/acpi-build.c | 41 ++ >> hw/i386/acpi-defs.h | 70 >> >> 2 files changed, 111 insertions(+) >> >> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c >> index 816c6d9..595f501 100644 >> --- a/hw/i386/acpi-build.c >> +++ b/hw/i386/acpi-build.c >> @@ -47,6 +47,7 @@ >> #include "hw/i386/ich9.h" >> #include "hw/pci/pci_bus.h" >> #include "hw/pci-host/q35.h" >> +#include "hw/i386/intel_iommu.h" >> >> #include "hw/i386/q35-acpi-dsdt.hex" >> #include "hw/i386/acpi-dsdt.hex" >> @@ -1350,6 +1351,31 @@ build_mcfg_q35(GArray *table_data, GArray *linker, >> AcpiMcfgInfo *info) >> } >> >> static void >> +build_dmar_q35(GArray *table_data, GArray *linker) >> +{ >> +int dmar_start = table_data->len; >> + >> +AcpiTableDmar *dmar; >> +AcpiDmarHardwareUnit *drhd; >> + >> +dmar = acpi_data_push(table_data, sizeof(*dmar)); >> +dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; >> +dmar->flags = 0;/* No intr_remap for now */ >> + >> +/* DMAR Remapping Hardware Unit Definition structure */ >> +drhd = acpi_data_push(table_data, sizeof(*drhd)); >> +drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); >> +drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */ >> +drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; >> +drhd->pci_segment = cpu_to_le16(0); >> +drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); >> + >> +build_header(linker, table_data, (void *)(table_data->data + >> dmar_start), >> + "DMAR", table_data->len - dmar_start, 1); >> +} >> + >> + >> +static void >> build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc) >> { >> AcpiTableHeader *dsdt; >> @@ -1470,6 +1496,17 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) >> return true; >> } >> >> +static bool acpi_has_iommu(void) >> +{ >> +bool ambiguous; >> +Object *intel_iommu; >> + >> +intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE, >> + &ambiguous); >> +return intel_iommu && !ambiguous; >> +} >> + >> + >> static >> void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) >> { >> @@ -1539,6 +1576,10 @@ void acpi_build(PcGuestInfo *guest_info, >> AcpiBuildTables *tables) >> acpi_add_table(table_offsets, tables->table_data); >> build_mcfg_q35(tables->table_data, tables->linker, &mcfg); >> } >> +if (acpi_has_iommu()) { >> +acpi_add_table(table_offsets, tables->table_data); >> +build_dmar_q35(tables->table_data, tables->linker); >> +} >> >> /* Add tables supplied by user (if any) */ >> for (u = acpi_table_first(); u; u = acpi_table_next(u)) { >> diff --git a/hw/i386/acpi-defs.h b/hw/i386/acpi-defs.h >> index e93babb..9674825 100644 >> --- a/hw/i386/acpi-defs.h >> +++ b/hw/i386/acpi-defs.h >> @@ -314,4 +314,74 @@ struct AcpiTableMcfg { >> } QEMU_PACKED; >> typedef struct AcpiTableMcfg AcpiTableMcfg; >> >> +/* DMAR - DMA Remapping table r2.2 */ >> +struct AcpiTableDmar { >> +ACPI_TABLE_HEADER_DEF >> +uint8_t host_address_width; /* Maximum DMA physical addressability */ >> +uint8_t flags; >> +uint8_t reserved[10]; >> +} QEMU_PACKED; >> +typedef struct AcpiTableDmar AcpiTableDmar; >> + >> +/* Masks for Flags field above */ >> +#define ACPI_DMAR_INTR_REMAP(1) >> +#define ACPI_DMAR_X2APIC_OPT_OUT(2) >> + >> +/* >> + * DMAR sub-structures (Follow DMA Remapping table) >> + */ >> +#define ACPI_DMAR_SUB_HEADER_DEF /* Common ACPI DMAR sub-structure header >> */\ >> +uint16_t type; \ >> +uint16_t length; > > R
Re: [Qemu-devel] [PATCH v3 4/5] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch
2014-08-14 19:35 GMT+08:00 Michael S. Tsirkin : > On Thu, Aug 14, 2014 at 07:33:29PM +0800, Le Tan wrote: >> 2014-08-14 19:12 GMT+08:00 Michael S. Tsirkin : >> > On Mon, Aug 11, 2014 at 03:05:01PM +0800, Le Tan wrote: >> >> Add Intel IOMMU emulation to q35 chipset and expose it to the guest. >> >> 1. Add a machine option. Users can use "-machine iommu=on|off" in the >> >> command >> >> line to enable/disable Intel IOMMU. The default is off. >> >> 2. Accroding to the machine option, q35 will initialize the Intel IOMMU >> >> and >> >> use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function >> >> for >> >> the pci bus. >> >> 3. q35_host_dma_iommu() will return different address space according to >> >> the >> >> bus_num and devfn of the device. >> >> >> >> Signed-off-by: Le Tan >> >> --- >> >> hw/core/machine.c | 27 +--- >> >> hw/pci-host/q35.c | 64 >> >> +++ >> >> include/hw/boards.h | 1 + >> >> include/hw/pci-host/q35.h | 2 ++ >> >> qemu-options.hx | 5 +++- >> >> vl.c | 4 +++ >> >> 6 files changed, 94 insertions(+), 9 deletions(-) >> >> >> >> diff --git a/hw/core/machine.c b/hw/core/machine.c >> >> index 7a66c57..f0046d6 100644 >> >> --- a/hw/core/machine.c >> >> +++ b/hw/core/machine.c >> >> @@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const >> >> char *value, Error **errp) >> >> ms->firmware = g_strdup(value); >> >> } >> >> >> >> +static bool machine_get_iommu(Object *obj, Error **errp) >> >> +{ >> >> +MachineState *ms = MACHINE(obj); >> >> + >> >> +return ms->iommu; >> >> +} >> >> + >> >> +static void machine_set_iommu(Object *obj, bool value, Error **errp) >> >> +{ >> >> +MachineState *ms = MACHINE(obj); >> >> + >> >> +ms->iommu = value; >> >> +} >> >> + >> >> static void machine_initfn(Object *obj) >> >> { >> >> object_property_add_str(obj, "accel", >> >> @@ -270,10 +284,17 @@ static void machine_initfn(Object *obj) >> >> machine_set_dump_guest_core, >> >> NULL); >> >> object_property_add_bool(obj, "mem-merge", >> >> - machine_get_mem_merge, >> >> machine_set_mem_merge, NULL); >> >> -object_property_add_bool(obj, "usb", machine_get_usb, >> >> machine_set_usb, NULL); >> >> + machine_get_mem_merge, >> >> + machine_set_mem_merge, NULL); >> >> +object_property_add_bool(obj, "usb", >> >> + machine_get_usb, >> >> + machine_set_usb, NULL); >> >> object_property_add_str(obj, "firmware", >> >> -machine_get_firmware, machine_set_firmware, >> >> NULL); >> >> +machine_get_firmware, >> >> +machine_set_firmware, NULL); >> >> +object_property_add_bool(obj, "iommu", >> >> + machine_get_iommu, >> >> + machine_set_iommu, NULL); >> >> } >> >> >> >> static void machine_finalize(Object *obj) >> >> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c >> >> index a0a3068..3342711 100644 >> >> --- a/hw/pci-host/q35.c >> >> +++ b/hw/pci-host/q35.c >> >> @@ -347,6 +347,53 @@ static void mch_reset(DeviceState *qdev) >> >> mch_update(mch); >> >> } >> >> >> >> +static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int >> >> devfn) >> >> +{ >> >> +IntelIOMMUState *s = opaque; >> >> +VTDAddressSpace **pvtd_as; >> >> +VTDAddressSpace *vtd_as; >> >> +int bus_num = pci_bus_num(bus); >> >> + >> >> +assert(devfn >= 0); >> >> + >> >> +pvtd_as = s-
Re: [Qemu-devel] [PATCH v3 3/5] intel-iommu: add DMAR table to ACPI tables
2014-08-14 19:51 GMT+08:00 Jan Kiszka : > On 2014-08-14 13:43, Michael S. Tsirkin wrote: >> On Thu, Aug 14, 2014 at 01:36:49PM +0200, Jan Kiszka wrote: >>> On 2014-08-14 13:06, Michael S. Tsirkin wrote: >>>> On Mon, Aug 11, 2014 at 03:05:00PM +0800, Le Tan wrote: >>>>> Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE >>>>> exists, >>>>> add DMAR table to ACPI RSDT table. For now the DMAR table indicates that >>>>> there >>>>> is only one hardware unit without INTR_REMAP capability on the platform. >>>>> >>>>> Signed-off-by: Le Tan >>>> >>>> Could you add a unit test please? >>> >>> While unit tests would really be helpful, I'm afraid that's not in reach >>> (GSoC is almost over). The good news is that we have pretty broad test >>> coverage with both Linux and also Jailhouse already. >>> >>> Do you see unit tests as precondition for merging the series? >>> >>> Jan >>> >> >> Not a pre-requisite - it's very easy to add a unit test, >> just add a case in test_acpi_tcg. So I can do it myself afterwards. >> > > Ah, ok, that's tests/bios-tables-test.c. Maybe Le can have a look if > time is left. But hard pecils-down is already on the 18th. > > Jan > > OK, I will attempt to add that. I have just pushed the IOTLB cache. I am going to fix patches according to these reviews now. Le
Re: [Qemu-devel] [PATCH v3 0/5] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset
Hi Knut, 2014-08-15 19:15 GMT+08:00 Knut Omang : > On Fri, 2014-08-15 at 06:42 +0200, Knut Omang wrote: >> On Thu, 2014-08-14 at 14:10 +0200, Jan Kiszka wrote: >> > On 2014-08-14 13:15, Michael S. Tsirkin wrote: >> > > On Mon, Aug 11, 2014 at 03:04:57PM +0800, Le Tan wrote: >> > >> Hi, >> > >> >> > >> These patches are intended to introduce Intel IOMMU (VT-d) emulation to >> > >> q35 >> > >> chipset. The major job in these patches is to add support for emulating >> > >> Intel >> > >> IOMMU according to the VT-d specification, including basic responses to >> > >> CSRs >> > >> accesses, the logics of DMAR (DMA remapping) and DMA memory address >> > >> translations. >> > > >> > > Thanks! >> > > Looks very good overall, I noted some coding style issues - I didn't >> > > bother reporting each issue in every place where it appears - reported >> > > each issue once only, so please find and fix all instances of each >> > > issue. >> > >> > BTW, because I was in urgent need for virtual test environment for >> > Jailhouse, I hacked interrupt remapping on top of Le's patches: >> > >> > http://git.kiszka.org/?p=qemu.git;a=shortlog;h=refs/heads/queues/vtd-intremap >> > >> > The approach likely needs further discussions and refinements but it >> > already allows me to work on top with our hypervisor, and also Linux. >> > You can see from the last commit that Le's work made it pretty easy to >> > build this on top. >> >> Le, >> >> I have tried Jan's branch with my device setup which consists of a >> minimal q35 setup, an ioh3420 root port (specified as -device >> ioh3420,slot=0 ) and a pcie device plugged into that root port, which >> gives the following lscpi -t: >> >> -[:00]-+-00.0 >>+-01.0 >>+-02.0 >>+-03.0-[01]00.0 >>+-04.0 >>+-1f.0 >>+-1f.2 >>\-1f.3 >> >> All seems to work beautifully (I see the ISA bridge happily receive >> translations) until the first DMA from my device model (at 1:00.0) >> arrives, at which point I get: >> >> [ 1663.732413] dmar: DMAR:[DMA Write] Request device [00:03.0] fault addr >> fffa >> [ 1663.732413] DMAR:[fault reason 02] Present bit in context entry is clear >> >> I would have expected request device 01:00.0 for this. >> It is not clear to me yet if this is a weakness of the implementation of >> ioh3420 or the iommu. Just wanted to let you know right away in case you >> can shed some light to it or it is an easy fix, >> >> The device uses pci_dma_rw with itself as device pointer. > > To verify my hypothesis: with this rude hack my device now works much > better: > > @@ -774,6 +780,8 @@ static void iommu_translate(VTDAddressSpace *vtd_as, > int bus_num, int devfn, > is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; > } else { > ret_fr = dev_to_context_entry(s, bus_num, devfn, &ce); > +if (ret_fr) > +ret_fr = dev_to_context_entry(s, 1, 0, &ce); > is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; > if (ret_fr) { > ret_fr = -ret_fr; > > Looking at how things look on hardware, multiple devices often receive > overlapping DMA address ranges for different physical addresses. > > So if I understand the way this works, every requester ID would also > need to have it's own unique VTDAddressSpace, as each pci > device/function sees a unique DMA address space.. ioh3420 is a pcie-to-pcie bridge, right? In my opinion, each pci-e device behind the pcie-to-pcie bridge can be assigned individually. For now I added the VT-d to q35 by just adding it to the root pci bus. You can see here in q35.c: pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu); So if we add a pcie-to-pcie bridge, we may have to call the pci_setup_iommu() for that new bus. I don't know where to hook into this now. :) If you know the mechanism behind that, you can try to add that for the new bus. (I will dive into this after the clean up.) What do you think? Thanks very much for your testing! :) Le > Knut >
[Qemu-devel] [PATCH v4 2/8] intel-iommu: introduce Intel IOMMU (VT-d) emulation
Add support for emulating Intel IOMMU according to the VT-d specification for the q35 chipset machine. Implement the logics for DMAR (DMA remapping) without PASID support. The emulation supports register-based invalidation and primary fault logging. Signed-off-by: Le Tan --- hw/i386/Makefile.objs |1 + hw/i386/intel_iommu.c | 1257 hw/i386/intel_iommu_internal.h | 333 +++ include/hw/i386/intel_iommu.h | 89 +++ 4 files changed, 1680 insertions(+) create mode 100644 hw/i386/intel_iommu.c create mode 100644 hw/i386/intel_iommu_internal.h create mode 100644 include/hw/i386/intel_iommu.h diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs index 48014ab..6936111 100644 --- a/hw/i386/Makefile.objs +++ b/hw/i386/Makefile.objs @@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/ obj-y += multiboot.o smbios.o obj-y += pc.o pc_piix.o pc_q35.o obj-y += pc_sysfw.o +obj-y += intel_iommu.o obj-$(CONFIG_XEN) += ../xenpv/ xen/ obj-y += kvmvapic.o diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c new file mode 100644 index 000..8e67e04 --- /dev/null +++ b/hw/i386/intel_iommu.c @@ -0,0 +1,1257 @@ +/* + * QEMU emulation of an Intel IOMMU (VT-d) + * (DMA Remapping device) + * + * Copyright (C) 2013 Knut Omang, Oracle + * Copyright (C) 2014 Le Tan, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "hw/sysbus.h" +#include "exec/address-spaces.h" +#include "intel_iommu_internal.h" + +/*#define DEBUG_INTEL_IOMMU*/ +#ifdef DEBUG_INTEL_IOMMU +enum { +DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, +}; +#define VTD_DBGBIT(x) (1 << DEBUG_##x) +static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR); + +#define VTD_DPRINTF(what, fmt, ...) do { \ +if (vtd_dbgflags & VTD_DBGBIT(what)) { \ +fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \ +## __VA_ARGS__); } \ +} while (0) +#else +#define VTD_DPRINTF(what, fmt, ...) do {} while (0) +#endif + +static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val, +uint64_t wmask, uint64_t w1cmask) +{ +stq_le_p(&s->csr[addr], val); +stq_le_p(&s->wmask[addr], wmask); +stq_le_p(&s->w1cmask[addr], w1cmask); +} + +static void vtd_define_quad_wo(IntelIOMMUState *s, hwaddr addr, uint64_t mask) +{ +stq_le_p(&s->womask[addr], mask); +} + +static void vtd_define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val, +uint32_t wmask, uint32_t w1cmask) +{ +stl_le_p(&s->csr[addr], val); +stl_le_p(&s->wmask[addr], wmask); +stl_le_p(&s->w1cmask[addr], w1cmask); +} + +static void vtd_define_long_wo(IntelIOMMUState *s, hwaddr addr, uint32_t mask) +{ +stl_le_p(&s->womask[addr], mask); +} + +/* "External" get/set operations */ +static void vtd_set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val) +{ +uint64_t oldval = ldq_le_p(&s->csr[addr]); +uint64_t wmask = ldq_le_p(&s->wmask[addr]); +uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); +stq_le_p(&s->csr[addr], + ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); +} + +static void vtd_set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val) +{ +uint32_t oldval = ldl_le_p(&s->csr[addr]); +uint32_t wmask = ldl_le_p(&s->wmask[addr]); +uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]); +stl_le_p(&s->csr[addr], + ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val)); +} + +static uint64_t vtd_get_quad(IntelIOMMUState *s, hwaddr addr) +{ +uint64_t val = ldq_le_p(&s->csr[addr]); +uint64_t womask = ldq_le_p(&s->womask[addr]); +return val & ~womask; +} + +static uint32_t vtd_get_long(IntelIOMMUState *s, hwaddr addr) +{ +uint32_t val = ldl_le_p(&s->csr[addr]); +uint32_t womask = ldl_le_p(&s->womask[addr]); +return val & ~womask; +} + +/* "Internal" get/set operations */ +static uint64_t vtd_get_quad_raw(IntelIOMMUState *s, hwaddr addr) +{ +return ldq_le_p(&s->csr[addr]); +} + +static uint32_t vtd_get_long_r
[Qemu-devel] [PATCH v4 0/8] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset
Hi, These patches are intended to introduce Intel IOMMU (VT-d) emulation to q35 chipset. The major job in these patches is to add support for emulating Intel IOMMU according to the VT-d specification, including basic responses to CSRs accesses, the logics of DMAR (DMA remapping) and DMA memory address translations. Features implemented for now are: 1. Response to important CSRs accesses; 2. DMAR (DMA remapping) without PASID support; 3. Primary fault logging; 4. Support both register-based and queued invalidation for IOTLB and context cache invalidation; 5. Add DMAR table to ACPI tables to expose VT-d to BIOS; 6. Add "-machine iommu=on|off" option to enable/disable VT-d; 7. Only one DMAR unit for all the devices of PCI Segment 0; 8. Context-cache and IOTLB. Testing: 1. L1 guest with Linux with intel_iommu=on can interact with VT-d and boot smoothly, and there exists information about VT-d in the log of kernel; 2. Run L1 with VT-d, L2 guest with Linux can boot smoothly withou PCI device passthrough; 3. Run L1 with VT-d and "-soundhw ac97 (QEMU_AUDIO_DRV=alsa)", then assign the sound card to L2; L2 can boot smoothly with legacy PCI assignment and I can hear the music played in L2 from the host speakers; 4. Jailhouse hypervisor can run smoothly (tested by Jan). 5. Run L1 with VT-d and e1000 network card, then assign e1000 to L2; L2 will be STUCK when booting. This still remains unsolved now. As far as I know, I suppose that the L2 crashes when doing e1000_probe(). The QEMU of L1 will dump something with "KVM: entry failed, hardware error 0x0", and the KVM of host will print "nested_vmx_exit_handled failed vm entry 7". Unlike assigning the sound card, after being assigned to L2, there is no translation entry of e1000 through VT-d, which I think means that e1000 doesn't issue any DMA access during the boot of L2. Sometimes the kernel of L2 will print "divide error" during booting. Maybe it results from the lack of reset mechanism. 6. VFIO is tested and is similar to legacy pci assignment. TODO: 1. Fix the bug of legacy PCI assignment; 2. Add unit test for DMAR ACPI table; 3. Add support for PCIE-to-PCIE bridge. Changes since v3: *address reviewing suggestions given by Jan and Michael -implement Context-cache and IOTLB -remove 'inline' keyword from most functions -rename all the functions with prefix vtd_ -clean up constant definitions Changes since v2: *address reviewing suggestions given by Jan -add support for primary fault logging -add support for queued invalidation Changes since v1: *address reviewing suggestions given by Michael, Paolo, Stefan and Jan -split intel_iommu.h to include/hw/i386/intel_iommu.h and hw/i386/intel_iommu_internal.h -change the copyright information -change D() to VTD_DPRINTF() -remove dead code -rename constant definitions with consistent prefix VTD_ -rename some struct definitions according to QEMU standard -rename some CSRs access functions -use endian-save functions to access CSRs -change machine option to "iommu=on|off" Thanks very much! Git trees: https://github.com/tamlok/qemu Le Tan (8): iommu: add is_write as a parameter to the translate function of MemoryRegionIOMMUOps intel-iommu: introduce Intel IOMMU (VT-d) emulation intel-iommu: add DMAR table to ACPI tables intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch intel-iommu: fix coding style issues around in q35.c and machine.c intel-iommu: add supports for queued invalidation interface intel-iommu: add context-cache to cache context-entry intel-iommu: add IOTLB using hash table exec.c |2 +- hw/alpha/typhoon.c |3 +- hw/core/machine.c | 27 +- hw/i386/Makefile.objs |1 + hw/i386/acpi-build.c | 39 + hw/i386/acpi-defs.h| 40 + hw/i386/intel_iommu.c | 1963 hw/i386/intel_iommu_internal.h | 389 hw/pci-host/apb.c |3 +- hw/pci-host/q35.c | 58 +- hw/ppc/spapr_iommu.c |3 +- include/exec/memory.h |2 +- include/hw/boards.h|1 + include/hw/i386/intel_iommu.h | 120 +++ include/hw/pci-host/q35.h |2 + qemu-options.hx|5 +- vl.c |4 + 17 files changed, 2648 insertions(+), 14 deletions(-) create mode 100644 hw/i386/intel_iommu.c create mode 100644 hw/i386/intel_iommu_internal.h create mode 100644 include/hw/i386/intel_iommu.h -- 1.9.1
[Qemu-devel] [PATCH v4 1/8] iommu: add is_write as a parameter to the translate function of MemoryRegionIOMMUOps
Add a bool variable is_write as a parameter to the translate function of MemoryRegionIOMMUOps to indicate the operation of the access. It can be used for correct fault reporting from within the callback. Change the interface of related functions. Signed-off-by: Le Tan --- exec.c| 2 +- hw/alpha/typhoon.c| 3 ++- hw/pci-host/apb.c | 3 ++- hw/ppc/spapr_iommu.c | 3 ++- include/exec/memory.h | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/exec.c b/exec.c index 765bd94..5ccc106 100644 --- a/exec.c +++ b/exec.c @@ -373,7 +373,7 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr, break; } -iotlb = mr->iommu_ops->translate(mr, addr); +iotlb = mr->iommu_ops->translate(mr, addr, is_write); addr = ((iotlb.translated_addr & ~iotlb.addr_mask) | (addr & iotlb.addr_mask)); len = MIN(len, (addr | iotlb.addr_mask) - addr + 1); diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index 67a1070..31947d9 100644 --- a/hw/alpha/typhoon.c +++ b/hw/alpha/typhoon.c @@ -660,7 +660,8 @@ static bool window_translate(TyphoonWindow *win, hwaddr addr, /* Handle PCI-to-system address translation. */ /* TODO: A translation failure here ought to set PCI error codes on the Pchip and generate a machine check interrupt. */ -static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr) +static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr, + bool is_write) { TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu); IOMMUTLBEntry ret; diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index d238a84..0e0e0ee 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -203,7 +203,8 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &is->iommu_as; } -static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr) +static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr, + bool is_write) { IOMMUState *is = container_of(iommu, IOMMUState, iommu); hwaddr baseaddr, offset; diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c index f6e32a4..6c91d8e 100644 --- a/hw/ppc/spapr_iommu.c +++ b/hw/ppc/spapr_iommu.c @@ -59,7 +59,8 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn) return NULL; } -static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr) +static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr, + bool is_write) { sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu); uint64_t tce; diff --git a/include/exec/memory.h b/include/exec/memory.h index e2c8e3e..834543d 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -129,7 +129,7 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps; struct MemoryRegionIOMMUOps { /* Return a TLB entry that contains a given address. */ -IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr); +IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write); }; typedef struct CoalescedMemoryRange CoalescedMemoryRange; -- 1.9.1
[Qemu-devel] [PATCH v4 4/8] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch
Add Intel IOMMU emulation to q35 chipset and expose it to the guest. 1. Add a machine option. Users can use "-machine iommu=on|off" in the command line to enable/disable Intel IOMMU. The default is off. 2. Accroding to the machine option, q35 will initialize the Intel IOMMU and use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function for the pci bus. 3. q35_host_dma_iommu() will return different address space according to the bus_num and devfn of the device. Signed-off-by: Le Tan --- hw/core/machine.c | 17 + hw/pci-host/q35.c | 46 ++ include/hw/boards.h | 1 + include/hw/pci-host/q35.h | 2 ++ qemu-options.hx | 5 - vl.c | 4 6 files changed, 74 insertions(+), 1 deletion(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 7a66c57..0708de5 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const char *value, Error **errp) ms->firmware = g_strdup(value); } +static bool machine_get_iommu(Object *obj, Error **errp) +{ +MachineState *ms = MACHINE(obj); + +return ms->iommu; +} + +static void machine_set_iommu(Object *obj, bool value, Error **errp) +{ +MachineState *ms = MACHINE(obj); + +ms->iommu = value; +} + static void machine_initfn(Object *obj) { object_property_add_str(obj, "accel", @@ -274,6 +288,9 @@ static void machine_initfn(Object *obj) object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, NULL); object_property_add_str(obj, "firmware", machine_get_firmware, machine_set_firmware, NULL); +object_property_add_bool(obj, "iommu", + machine_get_iommu, + machine_set_iommu, NULL); } static void machine_finalize(Object *obj) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index a0a3068..ad8f1b9 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -347,6 +347,48 @@ static void mch_reset(DeviceState *qdev) mch_update(mch); } +static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) +{ +IntelIOMMUState *s = opaque; +VTDAddressSpace **pvtd_as; +int bus_num = pci_bus_num(bus); + +assert(0 <= bus_num && bus_num <= VTD_PCI_BUS_MAX); +assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX); + +pvtd_as = s->address_spaces[bus_num]; +if (!pvtd_as) { +/* No corresponding free() */ +pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX); +s->address_spaces[bus_num] = pvtd_as; +} +if (!pvtd_as[devfn]) { +pvtd_as[devfn] = g_malloc0(sizeof(VTDAddressSpace)); + +pvtd_as[devfn]->bus_num = (uint8_t)bus_num; +pvtd_as[devfn]->devfn = (uint8_t)devfn; +pvtd_as[devfn]->iommu_state = s; +memory_region_init_iommu(&pvtd_as[devfn]->iommu, OBJECT(s), + &s->iommu_ops, "intel_iommu", UINT64_MAX); +address_space_init(&pvtd_as[devfn]->as, + &pvtd_as[devfn]->iommu, "intel_iommu"); +} +return &pvtd_as[devfn]->as; +} + +static void mch_init_dmar(MCHPCIState *mch) +{ +PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch))); + +mch->iommu = INTEL_IOMMU_DEVICE(qdev_create(NULL, TYPE_INTEL_IOMMU_DEVICE)); +object_property_add_child(OBJECT(mch), "intel-iommu", + OBJECT(mch->iommu), NULL); +qdev_init_nofail(DEVICE(mch->iommu)); +sysbus_mmio_map(SYS_BUS_DEVICE(mch->iommu), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); + +pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu); +} + static int mch_init(PCIDevice *d) { int i; @@ -370,6 +412,10 @@ static int mch_init(PCIDevice *d) &mch->pam_regions[i+1], PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } +/* Intel IOMMU (VT-d) */ +if (qemu_opt_get_bool(qemu_get_machine_opts(), "iommu", false)) { +mch_init_dmar(mch); +} return 0; } diff --git a/include/hw/boards.h b/include/hw/boards.h index 605a970..dfb6718 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -123,6 +123,7 @@ struct MachineState { bool mem_merge; bool usb; char *firmware; +bool iommu; ram_addr_t ram_size; ram_addr_t maxram_size; diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index d9ee978..025d6e6 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -33,6 +33,7 @@ #include "hw/acpi/acpi.h" #include "hw/acpi/ich9.h" #include "hw/pci-host/pam.h" +#include "hw/i386/intel_iommu.h" #define TYPE_Q35_HOST_DEVICE
[Qemu-devel] [PATCH v4 3/8] intel-iommu: add DMAR table to ACPI tables
Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE exists, add DMAR table to ACPI RSDT table. For now the DMAR table indicates that there is only one hardware unit without INTR_REMAP capability on the platform. Signed-off-by: Le Tan --- hw/i386/acpi-build.c | 39 +++ hw/i386/acpi-defs.h | 40 2 files changed, 79 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 816c6d9..ac56a63 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -47,6 +47,7 @@ #include "hw/i386/ich9.h" #include "hw/pci/pci_bus.h" #include "hw/pci-host/q35.h" +#include "hw/i386/intel_iommu.h" #include "hw/i386/q35-acpi-dsdt.hex" #include "hw/i386/acpi-dsdt.hex" @@ -1350,6 +1351,30 @@ build_mcfg_q35(GArray *table_data, GArray *linker, AcpiMcfgInfo *info) } static void +build_dmar_q35(GArray *table_data, GArray *linker) +{ +int dmar_start = table_data->len; + +AcpiTableDmar *dmar; +AcpiDmarHardwareUnit *drhd; + +dmar = acpi_data_push(table_data, sizeof(*dmar)); +dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; +dmar->flags = 0;/* No intr_remap for now */ + +/* DMAR Remapping Hardware Unit Definition structure */ +drhd = acpi_data_push(table_data, sizeof(*drhd)); +drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); +drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */ +drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; +drhd->pci_segment = cpu_to_le16(0); +drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); + +build_header(linker, table_data, (void *)(table_data->data + dmar_start), + "DMAR", table_data->len - dmar_start, 1); +} + +static void build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc) { AcpiTableHeader *dsdt; @@ -1470,6 +1495,16 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) return true; } +static bool acpi_has_iommu(void) +{ +bool ambiguous; +Object *intel_iommu; + +intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE, + &ambiguous); +return intel_iommu && !ambiguous; +} + static void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) { @@ -1539,6 +1574,10 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables) acpi_add_table(table_offsets, tables->table_data); build_mcfg_q35(tables->table_data, tables->linker, &mcfg); } +if (acpi_has_iommu()) { +acpi_add_table(table_offsets, tables->table_data); +build_dmar_q35(tables->table_data, tables->linker); +} /* Add tables supplied by user (if any) */ for (u = acpi_table_first(); u; u = acpi_table_next(u)) { diff --git a/hw/i386/acpi-defs.h b/hw/i386/acpi-defs.h index e93babb..93f424b 100644 --- a/hw/i386/acpi-defs.h +++ b/hw/i386/acpi-defs.h @@ -314,4 +314,44 @@ struct AcpiTableMcfg { } QEMU_PACKED; typedef struct AcpiTableMcfg AcpiTableMcfg; +/* DMAR - DMA Remapping table r2.2 */ +struct AcpiTableDmar { +ACPI_TABLE_HEADER_DEF +uint8_t host_address_width; /* Maximum DMA physical addressability */ +uint8_t flags; +uint8_t reserved[10]; +} QEMU_PACKED; +typedef struct AcpiTableDmar AcpiTableDmar; + +/* Masks for Flags field above */ +#define ACPI_DMAR_INTR_REMAP1 +#define ACPI_DMAR_X2APIC_OPT_OUT(1 << 1) + +/* Values for sub-structure type for DMAR */ +enum { +ACPI_DMAR_TYPE_HARDWARE_UNIT = 0, /* DRHD */ +ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */ +ACPI_DMAR_TYPE_ATSR = 2,/* ATSR */ +ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3, /* RHSR */ +ACPI_DMAR_TYPE_ANDD = 4,/* ANDD */ +ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */ +}; + +/* + * Sub-structures for DMAR + */ +/* Type 0: Hardware Unit Definition */ +struct AcpiDmarHardwareUnit { +uint16_t type; +uint16_t length; +uint8_t flags; +uint8_t reserved; +uint16_t pci_segment; /* The PCI Segment associated with this unit */ +uint64_t address; /* Base address of remapping hardware register-set */ +} QEMU_PACKED; +typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit; + +/* Masks for Flags field above */ +#define ACPI_DMAR_INCLUDE_PCI_ALL 1 + #endif -- 1.9.1
[Qemu-devel] [PATCH v4 8/8] intel-iommu: add IOTLB using hash table
Add IOTLB to cache information about the translation of input-addresses. IOTLB use a GHashTable as cache. The key of the hash table is the logical-OR of gfn and source id after left-shifting. Signed-off-by: Le Tan --- hw/i386/intel_iommu.c | 213 - hw/i386/intel_iommu_internal.h | 34 ++- include/hw/i386/intel_iommu.h | 11 ++- 3 files changed, 251 insertions(+), 7 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index c514310..0a4282a 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -132,6 +132,35 @@ static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr, return new_val; } +/* GHashTable functions */ +static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2) +{ +return *((const uint64_t *)v1) == *((const uint64_t *)v2); +} + +static guint vtd_uint64_hash(gconstpointer v) +{ +return (guint)*(const uint64_t *)v; +} + +static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, + gpointer user_data) +{ +VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; +uint16_t domain_id = *(uint16_t *)user_data; +return entry->domain_id == domain_id; +} + +static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, +gpointer user_data) +{ +VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; +VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; +uint64_t gfn = info->gfn & info->mask; +return (entry->domain_id == info->domain_id) && +((entry->gfn & info->mask) == gfn); +} + /* Reset all the gen of VTDAddressSpace to zero and set the gen of * IntelIOMMUState to 1. */ @@ -159,6 +188,48 @@ static void vtd_reset_context_cache(IntelIOMMUState *s) s->context_cache_gen = 1; } +static void vtd_reset_iotlb(IntelIOMMUState *s) +{ +assert(s->iotlb); +g_hash_table_remove_all(s->iotlb); +} + +static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, + hwaddr addr) +{ +uint64_t key; + +key = (addr >> VTD_PAGE_SHIFT_4K) | + ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT); +return g_hash_table_lookup(s->iotlb, &key); + +} + +static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, + uint16_t domain_id, hwaddr addr, uint64_t slpte, + bool read_flags, bool write_flags) +{ +VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); +uint64_t *key = g_malloc(sizeof(*key)); +uint64_t gfn = addr >> VTD_PAGE_SHIFT_4K; + +VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64 +" slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte, +domain_id); +if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) { +VTD_DPRINTF(CACHE, "iotlb exceeds size limit, forced to reset"); +vtd_reset_iotlb(s); +} + +entry->gfn = gfn; +entry->domain_id = domain_id; +entry->slpte = slpte; +entry->read_flags = read_flags; +entry->write_flags = write_flags; +*key = gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT); +g_hash_table_replace(s->iotlb, key, entry); +} + /* Given the reg addr of both the message data and address, generate an * interrupt via MSI. */ @@ -693,6 +764,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num, bool is_fpd_set = false; bool reads = true; bool writes = true; +VTDIOTLBEntry *iotlb_entry; /* Check if the request is in interrupt address range */ if (vtd_is_interrupt_addr(addr)) { @@ -716,6 +788,17 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num, return; } } +/* Try to fetch slpte form IOTLB */ +iotlb_entry = vtd_lookup_iotlb(s, source_id, addr); +if (iotlb_entry) { +VTD_DPRINTF(CACHE, "hit iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64 +" slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, +iotlb_entry->slpte, iotlb_entry->domain_id); +slpte = iotlb_entry->slpte; +reads = iotlb_entry->read_flags; +writes = iotlb_entry->write_flags; +goto out; +} /* Try to fetch context-entry from cache first */ if (cc_entry->context_cache_gen == s->context_cache_gen) { VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d " @@ -760,6 +843,9 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num, return; } +vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte, +
[Qemu-devel] [PATCH v4 5/8] intel-iommu: fix coding style issues around in q35.c and machine.c
Fix coding style issues around in hw/pci-host/q35.c and hw/core/machine.c. Signed-off-by: Le Tan --- hw/core/machine.c | 10 +++--- hw/pci-host/q35.c | 11 ++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 0708de5..f0046d6 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -284,10 +284,14 @@ static void machine_initfn(Object *obj) machine_set_dump_guest_core, NULL); object_property_add_bool(obj, "mem-merge", - machine_get_mem_merge, machine_set_mem_merge, NULL); -object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, NULL); + machine_get_mem_merge, + machine_set_mem_merge, NULL); +object_property_add_bool(obj, "usb", + machine_get_usb, + machine_set_usb, NULL); object_property_add_str(obj, "firmware", -machine_get_firmware, machine_set_firmware, NULL); +machine_get_firmware, +machine_set_firmware, NULL); object_property_add_bool(obj, "iommu", machine_get_iommu, machine_set_iommu, NULL); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index ad8f1b9..5618ad2 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -405,12 +405,13 @@ static int mch_init(PCIDevice *d) memory_region_add_subregion_overlap(mch->system_memory, 0xa, &mch->smram_region, 1); memory_region_set_enabled(&mch->smram_region, false); -init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, mch->pci_address_space, - &mch->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE); +init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, + mch->pci_address_space, &mch->pam_regions[0], + PAM_BIOS_BASE, PAM_BIOS_SIZE); for (i = 0; i < 12; ++i) { -init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, mch->pci_address_space, - &mch->pam_regions[i+1], PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, - PAM_EXPAN_SIZE); +init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, + mch->pci_address_space, &mch->pam_regions[i+1], + PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } /* Intel IOMMU (VT-d) */ if (qemu_opt_get_bool(qemu_get_machine_opts(), "iommu", false)) { -- 1.9.1
[Qemu-devel] [PATCH v4 7/8] intel-iommu: add context-cache to cache context-entry
Add context-cache to cache context-entry encountered on a page-walk. Each VTDAddressSpace has a member of VTDContextCacheEntry which represents an entry in the context-cache. Since devices with different bus_num and devfn have their respective VTDAddressSpace, this will be a good way to reference the cached entries. Each VTDContextCacheEntry will have a context_cache_gen and the cached entry is valid only when context_cache_gen equals IntelIOMMUState.context_cache_gen. Signed-off-by: Le Tan --- hw/i386/intel_iommu.c | 188 +++-- hw/i386/intel_iommu_internal.h | 23 +++-- hw/pci-host/q35.c | 1 + include/hw/i386/intel_iommu.h | 22 + 4 files changed, 199 insertions(+), 35 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 60dec4f..c514310 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -27,6 +27,7 @@ #ifdef DEBUG_INTEL_IOMMU enum { DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG, +DEBUG_CACHE, }; #define VTD_DBGBIT(x) (1 << DEBUG_##x) static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR); @@ -131,6 +132,33 @@ static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr, return new_val; } +/* Reset all the gen of VTDAddressSpace to zero and set the gen of + * IntelIOMMUState to 1. + */ +static void vtd_reset_context_cache(IntelIOMMUState *s) +{ +VTDAddressSpace **pvtd_as; +VTDAddressSpace *vtd_as; +uint32_t bus_it; +uint32_t devfn_it; + +VTD_DPRINTF(CACHE, "global context_cache_gen=1"); +for (bus_it = 0; bus_it < VTD_PCI_BUS_MAX; ++bus_it) { +pvtd_as = s->address_spaces[bus_it]; +if (!pvtd_as) { +continue; +} +for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) { +vtd_as = pvtd_as[devfn_it]; +if (!vtd_as) { +continue; +} +vtd_as->context_cache_entry.context_cache_gen = 0; +} +} +s->context_cache_gen = 1; +} + /* Given the reg addr of both the message data and address, generate an * interrupt via MSI. */ @@ -651,11 +679,13 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) * @is_write: The access is a write operation * @entry: IOMMUTLBEntry that contain the addr to be translated and result */ -static void vtd_do_iommu_translate(IntelIOMMUState *s, uint8_t bus_num, +static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num, uint8_t devfn, hwaddr addr, bool is_write, IOMMUTLBEntry *entry) { +IntelIOMMUState *s = vtd_as->iommu_state; VTDContextEntry ce; +VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry; uint64_t slpte; uint32_t level; uint16_t source_id = vtd_make_source_id(bus_num, devfn); @@ -686,18 +716,35 @@ static void vtd_do_iommu_translate(IntelIOMMUState *s, uint8_t bus_num, return; } } - -ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce); -is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; -if (ret_fr) { -ret_fr = -ret_fr; -if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { -VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests " -"through this context-entry (with FPD Set)"); -} else { -vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); +/* Try to fetch context-entry from cache first */ +if (cc_entry->context_cache_gen == s->context_cache_gen) { +VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d " +"(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 ")", +bus_num, devfn, cc_entry->context_entry.hi, +cc_entry->context_entry.lo, cc_entry->context_cache_gen); +ce = cc_entry->context_entry; +is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; +} else { +ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce); +is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; +if (ret_fr) { +ret_fr = -ret_fr; +if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { +VTD_DPRINTF(FLOG, "fault processing is disabled for DMA " +"requests through this context-entry " +"(with FPD Set)"); +} else { +vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); +} +return; } -return; +/* Update context-cache */ +VTD_DPRINTF(CACHE, "update context-cache bus %d devfn %d " +"(hi %"PRIx64 " lo %"PRIx64 " gen
[Qemu-devel] [PATCH v4 6/8] intel-iommu: add supports for queued invalidation interface
Add supports for queued invalidation interface, an expended invalidation interface with extended capabilities. Signed-off-by: Le Tan --- hw/i386/intel_iommu.c | 373 - hw/i386/intel_iommu_internal.h | 27 ++- 2 files changed, 393 insertions(+), 7 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 8e67e04..60dec4f 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -314,6 +314,41 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, } } +/* Handle Invalidation Queue Errors of queued invalidation interface error + * conditions. + */ +static void vtd_handle_inv_queue_error(IntelIOMMUState *s) +{ +uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); + +vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE); +vtd_generate_fault_event(s, fsts_reg); +} + +/* Set the IWC field and try to generate an invalidation completion interrupt */ +static void vtd_generate_completion_event(IntelIOMMUState *s) +{ +VTD_DPRINTF(INV, "completes an invalidation wait command with " +"Interrupt Flag"); +if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) { +VTD_DPRINTF(INV, "there is a previous interrupt condition to be " +"serviced by software, " +"new invalidation event is not generated"); +return; +} +vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC); +vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP); +if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) { +VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation " +"event is not generated"); +return; +} else { +/* Generate the interrupt event */ +vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG); +vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0); +} +} + static inline bool vtd_root_entry_present(VTDRootEntry *root) { return root->val & VTD_ROOT_ENTRY_P; @@ -759,6 +794,54 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val) return iaig; } +static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s) +{ +return s->iq_tail == 0; +} + +static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s) +{ +return s->qi_enabled && (s->iq_tail == s->iq_head) && + (s->iq_last_desc_type == VTD_INV_DESC_WAIT); +} + +static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) +{ +uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG); + +VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off")); +if (en) { +if (vtd_queued_inv_enable_check(s)) { +s->iq = iqa_val & VTD_IQA_IQA_MASK; +/* 2^(x+8) entries */ +s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); +s->qi_enabled = true; +VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val); +VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d", +s->iq, s->iq_size); +/* Ok - report back to driver */ +vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES); +} else { +VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: " +"tail %"PRIu16, s->iq_tail); +} +} else { +if (vtd_queued_inv_disable_check(s)) { +/* disable Queued Invalidation */ +vtd_set_quad_raw(s, DMAR_IQH_REG, 0); +s->iq_head = 0; +s->qi_enabled = false; +/* Ok - report back to driver */ +vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0); +} else { +VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: " +"head %"PRIu16 ", tail %"PRIu16 +", last_descriptor %"PRIu8, +s->iq_head, s->iq_tail, s->iq_last_desc_type); +} +} +} + /* Set Root Table Pointer */ static void vtd_handle_gcmd_srtp(IntelIOMMUState *s) { @@ -804,6 +887,10 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s) /* Set/update the root-table pointer */ vtd_handle_gcmd_srtp(s); } +if (changed & VTD_GCMD_QIE) { +/* Queued Invalidation Enable */ +vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE); +} } /* Handle write to Context Command Register */ @@ -814,6 +901,11 @@ static void vtd_handle_ccmd_write(IntelIOMMUState *s) /* Context-cache invalidation request */ if (val & VTD_CCMD_ICC) { +if (s-
Re: [Qemu-devel] [PATCH v4 0/8] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset
Hi, 2014-08-29 5:12 GMT+08:00 Michael S. Tsirkin : > On Sat, Aug 16, 2014 at 01:55:36PM +0800, Le Tan wrote: >> Hi, >> >> These patches are intended to introduce Intel IOMMU (VT-d) emulation to q35 >> chipset. The major job in these patches is to add support for emulating Intel >> IOMMU according to the VT-d specification, including basic responses to CSRs >> accesses, the logics of DMAR (DMA remapping) and DMA memory address >> translations. > > Thanks, I applied this in my tree. > Will send upstream in the next pull request. > I hope this happened soon enough for you to meet your timing > requirements? I am sorry that I am inconvenient to access the Internet these days, so I can't response timely. There are no timing requirements for GSoC. :) Thanks very much! Le > >> Features implemented for now are: >> 1. Response to important CSRs accesses; >> 2. DMAR (DMA remapping) without PASID support; >> 3. Primary fault logging; >> 4. Support both register-based and queued invalidation for IOTLB and context >>cache invalidation; >> 5. Add DMAR table to ACPI tables to expose VT-d to BIOS; >> 6. Add "-machine iommu=on|off" option to enable/disable VT-d; >> 7. Only one DMAR unit for all the devices of PCI Segment 0; >> 8. Context-cache and IOTLB. >> >> Testing: >> 1. L1 guest with Linux with intel_iommu=on can interact with VT-d and boot >> smoothly, and there exists information about VT-d in the log of kernel; >> 2. Run L1 with VT-d, L2 guest with Linux can boot smoothly withou PCI device >> passthrough; >> 3. Run L1 with VT-d and "-soundhw ac97 (QEMU_AUDIO_DRV=alsa)", then assign >> the >> sound card to L2; L2 can boot smoothly with legacy PCI assignment and I can >> hear the music played in L2 from the host speakers; >> 4. Jailhouse hypervisor can run smoothly (tested by Jan). >> 5. Run L1 with VT-d and e1000 network card, then assign e1000 to L2; L2 will >> be >> STUCK when booting. This still remains unsolved now. As far as I know, I >> suppose >> that the L2 crashes when doing e1000_probe(). The QEMU of L1 will dump >> something with "KVM: entry failed, hardware error 0x0", and the KVM of host >> will print "nested_vmx_exit_handled failed vm entry 7". Unlike assigning the >> sound card, after being assigned to L2, there is no translation entry of >> e1000 >> through VT-d, which I think means that e1000 doesn't issue any DMA access >> during >> the boot of L2. Sometimes the kernel of L2 will print "divide error" during >> booting. Maybe it results from the lack of reset mechanism. >> 6. VFIO is tested and is similar to legacy pci assignment. >> >> TODO: >> 1. Fix the bug of legacy PCI assignment; >> 2. Add unit test for DMAR ACPI table; >> 3. Add support for PCIE-to-PCIE bridge. >> >> Changes since v3: >> *address reviewing suggestions given by Jan and Michael >> -implement Context-cache and IOTLB >> -remove 'inline' keyword from most functions >> -rename all the functions with prefix vtd_ >> -clean up constant definitions >> >> Changes since v2: >> *address reviewing suggestions given by Jan >> -add support for primary fault logging >> -add support for queued invalidation >> >> Changes since v1: >> *address reviewing suggestions given by Michael, Paolo, Stefan and Jan >> -split intel_iommu.h to include/hw/i386/intel_iommu.h and >> hw/i386/intel_iommu_internal.h >> -change the copyright information >> -change D() to VTD_DPRINTF() >> -remove dead code >> -rename constant definitions with consistent prefix VTD_ >> -rename some struct definitions according to QEMU standard >> -rename some CSRs access functions >> -use endian-save functions to access CSRs >> -change machine option to "iommu=on|off" >> >> Thanks very much! >> >> Git trees: >> https://github.com/tamlok/qemu >> >> Le Tan (8): >> iommu: add is_write as a parameter to the translate function of >> MemoryRegionIOMMUOps >> intel-iommu: introduce Intel IOMMU (VT-d) emulation >> intel-iommu: add DMAR table to ACPI tables >> intel-iommu: add Intel IOMMU emulation to q35 and add a machine option >> "iommu" as a switch >> intel-iommu: fix coding style issues around in q35.c and machine.c >> intel-iommu: add supports for queued invalidation interface >> intel-iommu: add context-cache to cache context-entry >> intel-iommu: add IOTLB using hash table >> >> exec.c
[Qemu-devel] [PATCH 0/4] replace some fprintf(stderr, ...) with error_report()
This series of patches replaces some more occurrences of fprintf(stderr, ...) with error_report() and removes the trailing "\n". Some of them are not changed because I am not sure if they should be changed. There are some inconsistency while dealing with macros that involves fprintf(stderr,...), that is, some of them remain the same while some are changed to error_report(). I have 43 more patches (most of them touch files in hw/) and would send them out once the first 4 patches are fine. Le Tan (4): arch_init: replace fprintf(stderr,...) with error_report() in arch_init.c audio: replace fprintf(stderr,...) with error_report() in audio block: replace fprintf(stderr,...) with error_report() in block/ bsd-user: replace fprintf(stderr,...) with error_report() arch_init.c| 36 --- audio/spiceaudio.c |2 +- audio/wavcapture.c |4 +- block-migration.c |4 +- block.c|4 +- block/linux-aio.c |4 +- block/nbd-client.h |2 +- block/qcow2-cluster.c |4 +- block/qcow2-refcount.c | 114 block/qcow2.c | 16 +++ block/quorum.c |4 +- block/raw-posix.c |8 ++-- block/raw-win32.c |6 +-- block/ssh.c|4 +- block/vdi.c| 14 +++--- block/vmdk.c | 21 - block/vpc.c|4 +- block/vvfat.c | 108 ++--- blockdev.c |6 +-- bsd-user/bsdload.c |2 +- bsd-user/elfload.c |2 +- bsd-user/main.c| 14 +++--- 22 files changed, 189 insertions(+), 194 deletions(-) -- 1.7.9.5
[Qemu-devel] [PATCH 2/4] audio: replace fprintf(stderr, ...) with error_report() in audio
Replace fprintf(stderr,...) with error_report() in files audio/*. The trailing "\n"s of the @fmt argument have been removed because @fmt of error_report() should not contain newline. Signed-off-by: Le Tan --- audio/spiceaudio.c |2 +- audio/wavcapture.c |4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/audio/spiceaudio.c b/audio/spiceaudio.c index fceee50..7b79bed 100644 --- a/audio/spiceaudio.c +++ b/audio/spiceaudio.c @@ -105,7 +105,7 @@ static int rate_get_samples (struct audio_pcm_info *info, SpiceRateCtl *rate) bytes = muldiv64 (ticks, info->bytes_per_second, get_ticks_per_sec ()); samples = (bytes - rate->bytes_sent) >> info->shift; if (samples < 0 || samples > 65536) { -fprintf (stderr, "Resetting rate control (%" PRId64 " samples)\n", samples); +error_report("Resetting rate control (%" PRId64 " samples)", samples); rate_start (rate); samples = 0; } diff --git a/audio/wavcapture.c b/audio/wavcapture.c index 9d94623..60f7a72 100644 --- a/audio/wavcapture.c +++ b/audio/wavcapture.c @@ -63,8 +63,8 @@ static void wav_destroy (void *opaque) } doclose: if (fclose (wav->f)) { -fprintf (stderr, "wav_destroy: fclose failed: %s", - strerror (errno)); +error_report("wav_destroy: fclose failed: %s", + strerror (errno)); } } -- 1.7.9.5
[Qemu-devel] [PATCH 1/4] arch_init: replace fprintf(stderr, ...) with error_report() in arch_init.c
Replace fprintf(stderr,...) with error_report() in the file arch_init.c. The trailing "\n"s of the @fmt argument have been removed because @fmt of error_report() should not contain newline. Signed-off-by: Le Tan --- arch_init.c | 36 +--- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/arch_init.c b/arch_init.c index 60c975d..0b41475 100644 --- a/arch_init.c +++ b/arch_init.c @@ -921,12 +921,12 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) xh_len = qemu_get_be16(f); if (xh_flags != ENCODING_FLAG_XBZRLE) { -fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); +error_report("Failed to load XBZRLE page - wrong compression!"); return -1; } if (xh_len > TARGET_PAGE_SIZE) { -fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); +error_report("Failed to load XBZRLE page - len overflow!"); return -1; } /* load data and decode */ @@ -936,11 +936,11 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host, TARGET_PAGE_SIZE); if (ret == -1) { -fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); +error_report("Failed to load XBZRLE page - decode error!"); rc = -1; } else if (ret > TARGET_PAGE_SIZE) { -fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", -ret, TARGET_PAGE_SIZE); +error_report("Failed to load XBZRLE page - size %d exceeds %d!", + ret, TARGET_PAGE_SIZE); abort(); } @@ -957,7 +957,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, if (flags & RAM_SAVE_FLAG_CONTINUE) { if (!block) { -fprintf(stderr, "Ack, bad migration stream!\n"); +error_report("Ack, bad migration stream!"); return NULL; } @@ -973,7 +973,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, return memory_region_get_ram_ptr(block->mr) + offset; } -fprintf(stderr, "Can't find block %s!\n", id); +error_report("Can't find block %s!", id); return NULL; } @@ -1026,10 +1026,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) QTAILQ_FOREACH(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id))) { if (block->length != length) { -fprintf(stderr, -"Length mismatch: %s: " RAM_ADDR_FMT -" in != " RAM_ADDR_FMT "\n", id, length, -block->length); +error_report( + "Length mismatch: %s: " RAM_ADDR_FMT + " in != " RAM_ADDR_FMT, id, length, + block->length); ret = -EINVAL; goto done; } @@ -1038,8 +1038,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } if (!block) { -fprintf(stderr, "Unknown ramblock \"%s\", cannot " -"accept migration\n", id); +error_report("Unknown ramblock \"%s\", cannot " + "accept migration", id); ret = -EINVAL; goto done; } @@ -1186,12 +1186,10 @@ void select_soundhw(const char *optarg) if (!c->name) { if (l > 80) { -fprintf(stderr, -"Unknown sound card name (too big to show)\n"); +error_report("Unknown sound card name (too big to show)"); } else { -fprintf(stderr, "Unknown sound card name `%.*s'\n", -(int) l, p); +error_report("Unknown sound card name `%.*s'", (int) l, p); } bad_card = 1; } @@ -1214,13 +1212,13 @@ void audio_init(void) if (c->enabled) { if (c->isa) { if (!isa_bus) { -fprintf(stderr, "ISA bus not available for %s\n", c->name); +err
[Qemu-devel] [PATCH 4/4] bsd-user: replace fprintf(stderr, ...) with error_report()
Replace fprintf(stderr,...) with error_report() in files bsd-user/*. The trailing "\n"s of the @fmt argument have been removed because @fmt of error_report() should not contain newline. Signed-off-by: Le Tan --- bsd-user/bsdload.c |2 +- bsd-user/elfload.c |2 +- bsd-user/main.c| 14 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c index 2abc713..6b52e08 100644 --- a/bsd-user/bsdload.c +++ b/bsd-user/bsdload.c @@ -183,7 +183,7 @@ int loader_exec(const char * filename, char ** argv, char ** envp, && bprm.buf[3] == 'F') { retval = load_elf_binary(&bprm,regs,infop); } else { -fprintf(stderr, "Unknown binary format\n"); +error_report("Unknown binary format"); return -1; } } diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c index 93fd9e4..95652b1 100644 --- a/bsd-user/elfload.c +++ b/bsd-user/elfload.c @@ -628,7 +628,7 @@ static abi_ulong copy_elf_strings(int argc,char ** argv, void **page, while (argc-- > 0) { tmp = argv[argc]; if (!tmp) { -fprintf(stderr, "VFS: argc is wrong"); +error_report("VFS: argc is wrong"); exit(-1); } tmp1 = tmp; diff --git a/bsd-user/main.c b/bsd-user/main.c index f81ba55..3825e76 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -378,8 +378,8 @@ void cpu_loop(CPUX86State *env) #endif default: pc = env->segs[R_CS].base + env->eip; -fprintf(stderr, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n", -(long)pc, trapnr); +error_report("qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting", + (long)pc, trapnr); abort(); } process_pending_signals(env); @@ -752,7 +752,7 @@ int main(int argc, char **argv) module_call_init(MODULE_INIT_QOM); if ((envlist = envlist_create()) == NULL) { -(void) fprintf(stderr, "Unable to allocate envlist\n"); +(void) error_report("Unable to allocate envlist"); exit(1); } @@ -794,7 +794,7 @@ int main(int argc, char **argv) } else if (!strcmp(r, "ignore-environment")) { envlist_free(envlist); if ((envlist = envlist_create()) == NULL) { -(void) fprintf(stderr, "Unable to allocate envlist\n"); +(void) error_report("Unable to allocate envlist"); exit(1); } } else if (!strcmp(r, "U")) { @@ -816,7 +816,7 @@ int main(int argc, char **argv) qemu_host_page_size = atoi(argv[optind++]); if (qemu_host_page_size == 0 || (qemu_host_page_size & (qemu_host_page_size - 1)) != 0) { -fprintf(stderr, "page size must be a power of two\n"); +error_report("page size must be a power of two"); exit(1); } } else if (!strcmp(r, "g")) { @@ -910,7 +910,7 @@ int main(int argc, char **argv) qemu_host_page_size */ env = cpu_init(cpu_model); if (!env) { -fprintf(stderr, "Unable to find CPU definition\n"); +error_report("Unable to find CPU definition"); exit(1); } cpu = ENV_GET_CPU(env); @@ -1014,7 +1014,7 @@ int main(int argc, char **argv) #ifndef TARGET_ABI32 /* enable 64 bit mode if possible */ if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { -fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); +error_report("The selected x86 CPU does not support 64 bit mode"); exit(1); } env->cr[4] |= CR4_PAE_MASK; -- 1.7.9.5
[Qemu-devel] [PATCH 3/4] block: replace fprintf(stderr, ...) with error_report() in block/
Replace fprintf(stderr,...) with error_report() in files block/*, block.c, block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument have been removed because @fmt of error_report() should not contain newline. Signed-off-by: Le Tan --- block-migration.c |4 +- block.c|4 +- block/linux-aio.c |4 +- block/nbd-client.h |2 +- block/qcow2-cluster.c |4 +- block/qcow2-refcount.c | 114 block/qcow2.c | 16 +++ block/quorum.c |4 +- block/raw-posix.c |8 ++-- block/raw-win32.c |6 +-- block/ssh.c|4 +- block/vdi.c| 14 +++--- block/vmdk.c | 21 - block/vpc.c|4 +- block/vvfat.c | 108 ++--- blockdev.c |6 +-- 16 files changed, 160 insertions(+), 163 deletions(-) diff --git a/block-migration.c b/block-migration.c index 56951e0..5bcf7c8 100644 --- a/block-migration.c +++ b/block-migration.c @@ -790,7 +790,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) bs = bdrv_find(device_name); if (!bs) { -fprintf(stderr, "Error unknown block device %s\n", +error_report("Error unknown block device %s", device_name); return -EINVAL; } @@ -833,7 +833,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) (addr == 100) ? '\n' : '\r'); fflush(stdout); } else if (!(flags & BLK_MIG_FLAG_EOS)) { -fprintf(stderr, "Unknown block migration flags: %#x\n", flags); +error_report("Unknown block migration flags: %#x", flags); return -EINVAL; } ret = qemu_file_get_error(f); diff --git a/block.c b/block.c index b749d31..7dc4acb 100644 --- a/block.c +++ b/block.c @@ -2694,8 +2694,8 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, * if it has been enabled. */ if (bs->io_limits_enabled) { -fprintf(stderr, "Disabling I/O throttling on '%s' due " -"to synchronous I/O.\n", bdrv_get_device_name(bs)); +error_report("Disabling I/O throttling on '%s' due " + "to synchronous I/O.", bdrv_get_device_name(bs)); bdrv_io_limits_disable(bs); } diff --git a/block/linux-aio.c b/block/linux-aio.c index 53434e2..b706a59 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -162,8 +162,8 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd, break; /* Currently Linux kernel does not support other operations */ default: -fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", -__func__, type); +error_report("%s: invalid AIO request type 0x%x.", + __func__, type); goto out_free_aiocb; } io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); diff --git a/block/nbd-client.h b/block/nbd-client.h index f2a6337..74178f4 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -9,7 +9,7 @@ #if defined(DEBUG_NBD) #define logout(fmt, ...) \ -fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__) +error_report("nbd\t%-24s" fmt, __func__, ##__VA_ARGS__) #else #define logout(fmt, ...) ((void)0) #endif diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 76d2bcf..b1c8daf 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -67,8 +67,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, } #ifdef DEBUG_ALLOC2 -fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", -s->l1_size, new_l1_size); +error_report("grow l1_table from %d to %" PRId64, + s->l1_size, new_l1_size); #endif new_l1_size2 = sizeof(uint64_t) * new_l1_size; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index e79895d..e494474 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -219,9 +219,9 @@ static int alloc_refcount_block(BlockDriverState *bs, } #ifdef DEBUG_ALLOC2 -fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64 -" at %" PRIx64 "\n", -refcount_table_index, cluster_index << s->cluster_bits, new_block); +error_report("qcow2: Allocate refcount block %d for %" PRIx64 + " at %" PRIx64, + refcount_table_index, cluster_index << s->cluster_bits, new_block); #endif if (in_same_refcount_block(s, new_block, cluster_i
Re: [Qemu-devel] [PATCH 3/4] block: replace fprintf(stderr, ...) with error_report() in block/
2014-05-10 21:18 GMT+08:00 Peter Crosthwaite : > On Sat, May 10, 2014 at 9:55 AM, Le Tan wrote: >> Replace fprintf(stderr,...) with error_report() in files block/*, block.c, >> block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument >> have been removed because @fmt of error_report() should not contain newline. >> >> Signed-off-by: Le Tan >> --- >> block-migration.c |4 +- >> block.c|4 +- >> block/linux-aio.c |4 +- >> block/nbd-client.h |2 +- >> block/qcow2-cluster.c |4 +- >> block/qcow2-refcount.c | 114 >> >> block/qcow2.c | 16 +++ >> block/quorum.c |4 +- >> block/raw-posix.c |8 ++-- >> block/raw-win32.c |6 +-- >> block/ssh.c|4 +- >> block/vdi.c| 14 +++--- >> block/vmdk.c | 21 - >> block/vpc.c|4 +- >> block/vvfat.c | 108 ++--- >> blockdev.c |6 +-- >> 16 files changed, 160 insertions(+), 163 deletions(-) >> >> diff --git a/block-migration.c b/block-migration.c >> index 56951e0..5bcf7c8 100644 >> --- a/block-migration.c >> +++ b/block-migration.c >> @@ -790,7 +790,7 @@ static int block_load(QEMUFile *f, void *opaque, int >> version_id) >> >> bs = bdrv_find(device_name); >> if (!bs) { >> -fprintf(stderr, "Error unknown block device %s\n", >> +error_report("Error unknown block device %s", >> device_name); >> return -EINVAL; >> } >> @@ -833,7 +833,7 @@ static int block_load(QEMUFile *f, void *opaque, int >> version_id) >> (addr == 100) ? '\n' : '\r'); >> fflush(stdout); >> } else if (!(flags & BLK_MIG_FLAG_EOS)) { >> -fprintf(stderr, "Unknown block migration flags: %#x\n", flags); >> +error_report("Unknown block migration flags: %#x", flags); >> return -EINVAL; >> } >> ret = qemu_file_get_error(f); >> diff --git a/block.c b/block.c >> index b749d31..7dc4acb 100644 >> --- a/block.c >> +++ b/block.c >> @@ -2694,8 +2694,8 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t >> offset, >> * if it has been enabled. >> */ >> if (bs->io_limits_enabled) { >> -fprintf(stderr, "Disabling I/O throttling on '%s' due " >> -"to synchronous I/O.\n", bdrv_get_device_name(bs)); >> +error_report("Disabling I/O throttling on '%s' due " >> + "to synchronous I/O.", bdrv_get_device_name(bs)); >> bdrv_io_limits_disable(bs); >> } >> >> diff --git a/block/linux-aio.c b/block/linux-aio.c >> index 53434e2..b706a59 100644 >> --- a/block/linux-aio.c >> +++ b/block/linux-aio.c >> @@ -162,8 +162,8 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void >> *aio_ctx, int fd, >> break; >> /* Currently Linux kernel does not support other operations */ >> default: >> -fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", >> -__func__, type); >> +error_report("%s: invalid AIO request type 0x%x.", >> + __func__, type); >> goto out_free_aiocb; >> } >> io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); >> diff --git a/block/nbd-client.h b/block/nbd-client.h >> index f2a6337..74178f4 100644 >> --- a/block/nbd-client.h >> +++ b/block/nbd-client.h >> @@ -9,7 +9,7 @@ >> >> #if defined(DEBUG_NBD) >> #define logout(fmt, ...) \ >> -fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__) >> +error_report("nbd\t%-24s" fmt, __func__, ##__VA_ARGS__) > > So i'm not sure we want to convert debug printfery to error_report. > There's very good value in converting the printfs with user > visibility, but ones like this seem intended for developers only as > throwaway-output. My thinking is that this is a lower level output > than error_report. For instance, as a developer you may do something > to break your error API yet you still want your debug printfery. >
[Qemu-devel] [PATCH 0/2] replace some fprintf(stderr, ...) with error_report()
This series of patches replaces some more occurrences of fprintf(stderr, ...) with error_report() and removes the trailing "\n". Those for debugging are not changed. Le Tan (2): arch_init: replace fprintf(stderr,...) with error_report() block: replace fprintf(stderr,...) with error_report() arch_init.c| 32 +++--- block-migration.c |5 +-- block.c|4 +- block/qcow2-refcount.c | 114 block/qcow2.c | 18 block/raw-posix.c | 10 ++--- block/raw-win32.c |6 +-- block/ssh.c|2 +- block/vdi.c| 15 --- block/vmdk.c | 15 +++ block/vpc.c|4 +- block/vvfat.c | 69 +++-- blockdev.c |6 +-- 13 files changed, 147 insertions(+), 153 deletions(-) -- 1.7.9.5
[Qemu-devel] [PATCH 2/2] block: replace fprintf(stderr, ...) with error_report()
Replace fprintf(stderr,...) with error_report() in files block/*, block.c, block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument have been removed because @fmt of error_report() should not contain newline. Signed-off-by: Le Tan --- block-migration.c |5 +-- block.c|4 +- block/qcow2-refcount.c | 114 block/qcow2.c | 18 block/raw-posix.c | 10 ++--- block/raw-win32.c |6 +-- block/ssh.c|2 +- block/vdi.c| 15 --- block/vmdk.c | 15 +++ block/vpc.c|4 +- block/vvfat.c | 69 +++-- blockdev.c |6 +-- 12 files changed, 132 insertions(+), 136 deletions(-) diff --git a/block-migration.c b/block-migration.c index 56951e0..1660565 100644 --- a/block-migration.c +++ b/block-migration.c @@ -790,8 +790,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) bs = bdrv_find(device_name); if (!bs) { -fprintf(stderr, "Error unknown block device %s\n", -device_name); +error_report("Error unknown block device %s", device_name); return -EINVAL; } @@ -833,7 +832,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) (addr == 100) ? '\n' : '\r'); fflush(stdout); } else if (!(flags & BLK_MIG_FLAG_EOS)) { -fprintf(stderr, "Unknown block migration flags: %#x\n", flags); +error_report("Unknown block migration flags: %#x", flags); return -EINVAL; } ret = qemu_file_get_error(f); diff --git a/block.c b/block.c index c90c71a..14f581b 100644 --- a/block.c +++ b/block.c @@ -2698,8 +2698,8 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, * if it has been enabled. */ if (bs->io_limits_enabled) { -fprintf(stderr, "Disabling I/O throttling on '%s' due " -"to synchronous I/O.\n", bdrv_get_device_name(bs)); +error_report("Disabling I/O throttling on '%s' due " + "to synchronous I/O.", bdrv_get_device_name(bs)); bdrv_io_limits_disable(bs); } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 9507aef..23d44b0 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -795,7 +795,7 @@ void qcow2_free_clusters(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE); ret = update_refcount(bs, offset, size, -1, type); if (ret < 0) { -fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret)); +error_report("qcow2_free_clusters failed: %s", strerror(-ret)); /* TODO Remember the clusters to free them later and avoid leaking */ } } @@ -1040,14 +1040,14 @@ static void inc_refcounts(BlockDriverState *bs, cluster_offset += s->cluster_size) { k = cluster_offset >> s->cluster_bits; if (k >= refcount_table_size) { -fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after " -"the end of the image file, can't properly check refcounts.\n", -cluster_offset); +error_report("Warning: cluster offset=0x%" PRIx64 " is after " + "the end of the image file, can't properly check refcounts.", + cluster_offset); res->check_errors++; } else { if (++refcount_table[k] == 0) { -fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64 -"\n", cluster_offset); +error_report("ERROR: overflow cluster offset=0x%" PRIx64, + cluster_offset); res->corruptions++; } } @@ -1091,9 +1091,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, case QCOW2_CLUSTER_COMPRESSED: /* Compressed clusters don't have QCOW_OFLAG_COPIED */ if (l2_entry & QCOW_OFLAG_COPIED) { -fprintf(stderr, "ERROR: cluster %" PRId64 ": " -"copied flag must never be set for compressed " -"clusters\n", l2_entry >> s->cluster_bits); +error_report("ERROR: cluster %" PRId64 ": " + "copied flag must never be set for compressed " + "clusters", l2_entry >> s->clus
[Qemu-devel] [PATCH 1/2] arch_init: replace fprintf(stderr, ...) with error_report()
Replace fprintf(stderr,...) with error_report() in the file arch_init.c. The trailing "\n"s of the @fmt argument have been removed because @fmt of error_report() should not contain newline. Signed-off-by: Le Tan --- arch_init.c | 32 +++- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/arch_init.c b/arch_init.c index 685ba0e..9f1a174 100644 --- a/arch_init.c +++ b/arch_init.c @@ -975,12 +975,12 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) xh_len = qemu_get_be16(f); if (xh_flags != ENCODING_FLAG_XBZRLE) { -fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); +error_report("Failed to load XBZRLE page - wrong compression!"); return -1; } if (xh_len > TARGET_PAGE_SIZE) { -fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); +error_report("Failed to load XBZRLE page - len overflow!"); return -1; } /* load data and decode */ @@ -989,7 +989,7 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) /* decode RLE */ if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host, TARGET_PAGE_SIZE) == -1) { -fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); +error_report("Failed to load XBZRLE page - decode error!"); return -1; } @@ -1006,7 +1006,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, if (flags & RAM_SAVE_FLAG_CONTINUE) { if (!block) { -fprintf(stderr, "Ack, bad migration stream!\n"); +error_report("Ack, bad migration stream!"); return NULL; } @@ -1022,7 +1022,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, return memory_region_get_ram_ptr(block->mr) + offset; } -fprintf(stderr, "Can't find block %s!\n", id); +error_report("Can't find block %s!", id); return NULL; } @@ -1075,10 +1075,9 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) QTAILQ_FOREACH(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id))) { if (block->length != length) { -fprintf(stderr, -"Length mismatch: %s: " RAM_ADDR_FMT -" in != " RAM_ADDR_FMT "\n", id, length, -block->length); +error_report("Length mismatch: %s: " RAM_ADDR_FMT + " in != " RAM_ADDR_FMT, id, length, + block->length); ret = -EINVAL; goto done; } @@ -1087,8 +1086,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) } if (!block) { -fprintf(stderr, "Unknown ramblock \"%s\", cannot " -"accept migration\n", id); +error_report("Unknown ramblock \"%s\", cannot " + "accept migration", id); ret = -EINVAL; goto done; } @@ -1243,12 +1242,11 @@ void select_soundhw(const char *optarg) if (!c->name) { if (l > 80) { -fprintf(stderr, -"Unknown sound card name (too big to show)\n"); +error_report("Unknown sound card name (too big to show)"); } else { -fprintf(stderr, "Unknown sound card name `%.*s'\n", -(int) l, p); +error_report("Unknown sound card name `%.*s'", + (int) l, p); } bad_card = 1; } @@ -1271,13 +1269,13 @@ void audio_init(void) if (c->enabled) { if (c->isa) { if (!isa_bus) { -fprintf(stderr, "ISA bus not available for %s\n", c->name); +error_report("ISA bus not available for %s", c->name); exit(1); } c->init.init_isa(isa_bus); } else { if (!pci_bus) { -fprintf(stderr, "PCI bus not available for %s\n", c->name); +error_report("PCI bus not available for %s", c->name); exit(1); } c->init.init_pci(pci_bus); -- 1.7.9.5
[Qemu-devel] GSoC student self-introduction
Hi, I am a student participating in GSoC 2014. My brief introduction is given below. I am looking forward to studying QEMU and finishing the GSoC project with the community. Thank you very much! Name: Le Tan IRC nick: #tamlok Public git repo URL: https://github.com/tamlok/qemu.git Project: Intel IOMMU (VT-d) Emulation Summary: I will work on adding supports for emulating Intel IOMMUs according to Intel VT-d specification, based on an existing VT-d framework. This project requires me to implement the VT-d emulation and add the minimally required VT-d features for q35 chipset to the IOMMU framework in QEMU. I will concentrate on finishing this project with the help of my mentor and the community this summer. Regards, Le Tan
Re: [Qemu-devel] [PATCH 1/4] arch_init: replace fprintf(stderr, ...) with error_report() in arch_init.c
There is a newer version of patch to fix this problem. I forgot to add the version number when I sent the new patch. Add this comment just to indicate that this patch should be dropped. :) 2014-05-10 7:55 GMT+08:00 Le Tan : > Replace fprintf(stderr,...) with error_report() in the file > arch_init.c. The trailing "\n"s of the @fmt argument have been removed > because @fmt of error_report() should not contain newline. > > Signed-off-by: Le Tan > --- > arch_init.c | 36 +--- > 1 file changed, 17 insertions(+), 19 deletions(-) > > diff --git a/arch_init.c b/arch_init.c > index 60c975d..0b41475 100644 > --- a/arch_init.c > +++ b/arch_init.c > @@ -921,12 +921,12 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, > void *host) > xh_len = qemu_get_be16(f); > > if (xh_flags != ENCODING_FLAG_XBZRLE) { > -fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n"); > +error_report("Failed to load XBZRLE page - wrong compression!"); > return -1; > } > > if (xh_len > TARGET_PAGE_SIZE) { > -fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n"); > +error_report("Failed to load XBZRLE page - len overflow!"); > return -1; > } > /* load data and decode */ > @@ -936,11 +936,11 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, > void *host) > ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host, > TARGET_PAGE_SIZE); > if (ret == -1) { > -fprintf(stderr, "Failed to load XBZRLE page - decode error!\n"); > +error_report("Failed to load XBZRLE page - decode error!"); > rc = -1; > } else if (ret > TARGET_PAGE_SIZE) { > -fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n", > -ret, TARGET_PAGE_SIZE); > +error_report("Failed to load XBZRLE page - size %d exceeds %d!", > + ret, TARGET_PAGE_SIZE); > abort(); > } > > @@ -957,7 +957,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, > > if (flags & RAM_SAVE_FLAG_CONTINUE) { > if (!block) { > -fprintf(stderr, "Ack, bad migration stream!\n"); > +error_report("Ack, bad migration stream!"); > return NULL; > } > > @@ -973,7 +973,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, > return memory_region_get_ram_ptr(block->mr) + offset; > } > > -fprintf(stderr, "Can't find block %s!\n", id); > +error_report("Can't find block %s!", id); > return NULL; > } > > @@ -1026,10 +1026,10 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > QTAILQ_FOREACH(block, &ram_list.blocks, next) { > if (!strncmp(id, block->idstr, sizeof(id))) { > if (block->length != length) { > -fprintf(stderr, > -"Length mismatch: %s: " RAM_ADDR_FMT > -" in != " RAM_ADDR_FMT "\n", id, > length, > -block->length); > +error_report( > + "Length mismatch: %s: " > RAM_ADDR_FMT > + " in != " RAM_ADDR_FMT, id, > length, > + block->length); > ret = -EINVAL; > goto done; > } > @@ -1038,8 +1038,8 @@ static int ram_load(QEMUFile *f, void *opaque, int > version_id) > } > > if (!block) { > -fprintf(stderr, "Unknown ramblock \"%s\", cannot " > -"accept migration\n", id); > +error_report("Unknown ramblock \"%s\", cannot " > + "accept migration", id); > ret = -EINVAL; > goto done; > } > @@ -1186,12 +1186,10 @@ void select_soundhw(const char *optarg) > > if (!c->name) { > if (l > 80) { > -fprintf(stderr, > -"Unknown sound card name (too big to
Re: [Qemu-devel] [PATCH 3/4] block: replace fprintf(stderr, ...) with error_report() in block/
There is a newer version of patch to fix this problem. I forgot to add the version number when I sent the new patch. Add this comment just to indicate that this patch should be dropped. :) 2014-05-10 21:18 GMT+08:00 Peter Crosthwaite : > On Sat, May 10, 2014 at 9:55 AM, Le Tan wrote: >> Replace fprintf(stderr,...) with error_report() in files block/*, block.c, >> block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument >> have been removed because @fmt of error_report() should not contain newline. >> >> Signed-off-by: Le Tan >> --- >> block-migration.c |4 +- >> block.c|4 +- >> block/linux-aio.c |4 +- >> block/nbd-client.h |2 +- >> block/qcow2-cluster.c |4 +- >> block/qcow2-refcount.c | 114 >> >> block/qcow2.c | 16 +++ >> block/quorum.c |4 +- >> block/raw-posix.c |8 ++-- >> block/raw-win32.c |6 +-- >> block/ssh.c|4 +- >> block/vdi.c| 14 +++--- >> block/vmdk.c | 21 - >> block/vpc.c|4 +- >> block/vvfat.c | 108 ++--- >> blockdev.c |6 +-- >> 16 files changed, 160 insertions(+), 163 deletions(-) >> >> diff --git a/block-migration.c b/block-migration.c >> index 56951e0..5bcf7c8 100644 >> --- a/block-migration.c >> +++ b/block-migration.c >> @@ -790,7 +790,7 @@ static int block_load(QEMUFile *f, void *opaque, int >> version_id) >> >> bs = bdrv_find(device_name); >> if (!bs) { >> -fprintf(stderr, "Error unknown block device %s\n", >> +error_report("Error unknown block device %s", >> device_name); >> return -EINVAL; >> } >> @@ -833,7 +833,7 @@ static int block_load(QEMUFile *f, void *opaque, int >> version_id) >> (addr == 100) ? '\n' : '\r'); >> fflush(stdout); >> } else if (!(flags & BLK_MIG_FLAG_EOS)) { >> -fprintf(stderr, "Unknown block migration flags: %#x\n", flags); >> +error_report("Unknown block migration flags: %#x", flags); >> return -EINVAL; >> } >> ret = qemu_file_get_error(f); >> diff --git a/block.c b/block.c >> index b749d31..7dc4acb 100644 >> --- a/block.c >> +++ b/block.c >> @@ -2694,8 +2694,8 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t >> offset, >> * if it has been enabled. >> */ >> if (bs->io_limits_enabled) { >> -fprintf(stderr, "Disabling I/O throttling on '%s' due " >> -"to synchronous I/O.\n", bdrv_get_device_name(bs)); >> +error_report("Disabling I/O throttling on '%s' due " >> + "to synchronous I/O.", bdrv_get_device_name(bs)); >> bdrv_io_limits_disable(bs); >> } >> >> diff --git a/block/linux-aio.c b/block/linux-aio.c >> index 53434e2..b706a59 100644 >> --- a/block/linux-aio.c >> +++ b/block/linux-aio.c >> @@ -162,8 +162,8 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void >> *aio_ctx, int fd, >> break; >> /* Currently Linux kernel does not support other operations */ >> default: >> -fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", >> -__func__, type); >> +error_report("%s: invalid AIO request type 0x%x.", >> + __func__, type); >> goto out_free_aiocb; >> } >> io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); >> diff --git a/block/nbd-client.h b/block/nbd-client.h >> index f2a6337..74178f4 100644 >> --- a/block/nbd-client.h >> +++ b/block/nbd-client.h >> @@ -9,7 +9,7 @@ >> >> #if defined(DEBUG_NBD) >> #define logout(fmt, ...) \ >> -fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__) >> +error_report("nbd\t%-24s" fmt, __func__, ##__VA_ARGS__) > > So i'm not sure we want to convert debug printfery to error_report. > There's very good value in converting the printfs with user > visibility, but ones like this seem intended for developers only as > throwaway-output. My thinking is that this is a lower level output > than error_report. For instance, as a developer you may do something > to break your error API yet you still want your debug printfery. > Wouldn't matter in this location, but there may be other parts of the > tree where we don't want error_report relinace for debug > instrumentation and it just seems better to keep it consistent. > > Thinking further afield, qemu_log may ultimately be the correct > mechanism for this instead (I think thats what I have been using for > new code recently anyway). > > Thoughts from others? > > Regards, > Peter
Re: [Qemu-devel] [Qemu-trivial] [PATCH 2/2] block: replace fprintf(stderr, ...) with error_report()
I will fix the whitespace issues and resend this series with a new version later. Thanks very much! ;) Le 2014-05-24 4:27 GMT+08:00 Michael Tokarev : > 21.05.2014 04:10, Le Tan wrote: >> Replace fprintf(stderr,...) with error_report() in files block/*, block.c, >> block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument >> have been removed because @fmt of error_report() should not contain newline. >> >> Signed-off-by: Le Tan >> --- >> block-migration.c |5 +-- >> block.c|4 +- >> block/qcow2-refcount.c | 114 >> >> block/qcow2.c | 18 >> block/raw-posix.c | 10 ++--- >> block/raw-win32.c |6 +-- >> block/ssh.c|2 +- >> block/vdi.c| 15 --- >> block/vmdk.c | 15 +++ >> block/vpc.c|4 +- >> block/vvfat.c | 69 +++-- >> blockdev.c |6 +-- >> 12 files changed, 132 insertions(+), 136 deletions(-) > > That's a rather large -trivial patch... ;) > > Please check this patch using ./tools/checkpatch.pl - it will show > many whitespace issues. > > I'd apply it to -trivial (after fixing the whitespace issues), all > the actual changes look sane. But Cc'ing kwolf and stefanha too > (who are the block subsystem maintainers), because the patch touches > too many of block code. > > Thanks, > > /mjt >
Re: [Qemu-devel] [Qemu-trivial] [PATCH v3] block: replace fprintf(stderr, ...) with error_report()
Oh, yes, I may misunderstood it. I just ran ./scripts/checkpatch.pl to check files that I touched and fixed those I can fix. So I will send the v4 of this patch which just fix the whitespace issues based on v2. Is that right? Forgive my being naive. :) Thanks very much for all! Le 2014-05-25 22:36 GMT+08:00 Michael Tokarev : > 25.05.2014 18:29, Jan Kiszka wrote: >> On 2014-05-25 10:44, Le Tan wrote: >>> Replace fprintf(stderr,...) with error_report() in files block/*, block.c, >>> block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument >>> have been removed because @fmt of error_report() should not contain newline. >>> Also fix some coding style issues. >> >> Let's do the "also" part in a separate patch. It's more than one or two >> trivial pass-by style fixes, and some people may like the fprintf >> conversion while having different views on the other changes. > > I think he misunderstood. > > I told him that his previous patch had its own whitespace issues, > ie, it _adds_ whitespace errors. > > In another reply I told him it is usually a good idea to fix style > issues in the code the patch touches -- this means, close to, if a > line being changed for something else has an unrelated style issue, > it should be fixed too. (An example was space between function name > and its arguments in fprintf argument list, while converting that > fprintf to error_report). > > But it looks like he took this advise in much more broad way and > fixed _all_ whitespace/style issues in the files he touches. Oh > well.. :) > > And yes, definitely, please don't mix it like this. Usually, these > code style issues should not be touched by its own, only if you > change nearby code. > > Thanks, > > /mjt
[Qemu-devel] [PATCH v4] block: replace fprintf(stderr, ...) with error_report()
Replace fprintf(stderr,...) with error_report() in files block/*, block.c, block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument have been removed because @fmt of error_report() should not contain newline. Signed-off-by: Le Tan --- block-migration.c |6 +-- block.c|4 +- block/qcow2-refcount.c | 115 +- block/qcow2.c | 16 +++--- block/raw-posix.c |8 ++- block/raw-win32.c |6 +-- block/ssh.c|2 +- block/vdi.c| 14 +++--- block/vmdk.c | 15 +++--- block/vpc.c|4 +- block/vvfat.c | 129 blockdev.c |6 +-- 12 files changed, 159 insertions(+), 166 deletions(-) diff --git a/block-migration.c b/block-migration.c index 56951e0..9cab084 100644 --- a/block-migration.c +++ b/block-migration.c @@ -790,8 +790,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) bs = bdrv_find(device_name); if (!bs) { -fprintf(stderr, "Error unknown block device %s\n", -device_name); +error_report("Error unknown block device %s", + device_name); return -EINVAL; } @@ -833,7 +833,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) (addr == 100) ? '\n' : '\r'); fflush(stdout); } else if (!(flags & BLK_MIG_FLAG_EOS)) { -fprintf(stderr, "Unknown block migration flags: %#x\n", flags); +error_report("Unknown block migration flags: %#x", flags); return -EINVAL; } ret = qemu_file_get_error(f); diff --git a/block.c b/block.c index c90c71a..725c46a 100644 --- a/block.c +++ b/block.c @@ -2698,8 +2698,8 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, * if it has been enabled. */ if (bs->io_limits_enabled) { -fprintf(stderr, "Disabling I/O throttling on '%s' due " -"to synchronous I/O.\n", bdrv_get_device_name(bs)); +error_report("Disabling I/O throttling on '%s' due " + "to synchronous I/O.\n", bdrv_get_device_name(bs)); bdrv_io_limits_disable(bs); } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 9507aef..9f6d1e6 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -795,7 +795,7 @@ void qcow2_free_clusters(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE); ret = update_refcount(bs, offset, size, -1, type); if (ret < 0) { -fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret)); +error_report("qcow2_free_clusters failed: %s", strerror(-ret)); /* TODO Remember the clusters to free them later and avoid leaking */ } } @@ -1040,14 +1040,15 @@ static void inc_refcounts(BlockDriverState *bs, cluster_offset += s->cluster_size) { k = cluster_offset >> s->cluster_bits; if (k >= refcount_table_size) { -fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after " -"the end of the image file, can't properly check refcounts.\n", -cluster_offset); +error_report("Warning: cluster offset=0x%" PRIx64 " is after the " + "end of the image file, " + "can't properly check refcounts.", + cluster_offset); res->check_errors++; } else { if (++refcount_table[k] == 0) { -fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64 -"\n", cluster_offset); +error_report("ERROR: overflow cluster offset=0x%" PRIx64, + cluster_offset); res->corruptions++; } } @@ -1091,9 +1092,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, case QCOW2_CLUSTER_COMPRESSED: /* Compressed clusters don't have QCOW_OFLAG_COPIED */ if (l2_entry & QCOW_OFLAG_COPIED) { -fprintf(stderr, "ERROR: cluster %" PRId64 ": " -"copied flag must never be set for compressed " -"clusters\n", l2_entry >> s->cluster_bits); +error_report("ERROR: cluster %" PRId64 ": " + "copied flag must never be set for compresse
[Qemu-devel] About AddressSpace in intel-iommu emulation
Hi Paolo, I am adding intel-iommu emulation to q35 for the GSoC project. I am confused about AddressSpace and I believe that you can help me. :) 1. For intel-iommu emulation, I have to read the translation structures from guest memory, that is, the guest will prepare some tables in memory and write the physical address of them to a register of intel-iommu, and I need to access those structures. I use dma_memory_read(&address_space_memory,...) to do this. Is that right? I am not sure that whether accesses to address_space_memory will be translated through IOMMU. I think the answer is not, because I see that cpu_physical_memory_read() also use address_space_memory as AddressSpace. 2. In my opinion, I have to init a AddressSpace and link it with my IOMMU MemoryRegion, then the bus uses this AddressSpace to translate the accesses. Is that right? For q35, how can I register my IOMMU MemoryRegion to the bus? I see that there is function pci_setup_iommu() that links a AddressSpace to the bus to translate accesses to PCI into system memory. Is that related? I think q35 should maintain a bus AddressSpace, but I can't find it. What do you think? Thanks very much! Regards, Le Tan
Re: [Qemu-devel] About AddressSpace in intel-iommu emulation
2014-06-26 22:05 GMT+08:00 Paolo Bonzini : > Il 26/06/2014 16:01, Le Tan ha scritto: > >> Hi Paolo, >> I am adding intel-iommu emulation to q35 for the GSoC project. I am >> confused about AddressSpace and I believe that you can help me. :) >> 1. For intel-iommu emulation, I have to read the translation >> structures from guest memory, that is, the guest will prepare some >> tables in memory and write the physical address of them to a register >> of intel-iommu, and I need to access those structures. I use >> dma_memory_read(&address_space_memory,...) to do this. Is that right? >> I am not sure that whether accesses to address_space_memory will be >> translated through IOMMU. I think the answer is not, because I see >> that cpu_physical_memory_read() also use address_space_memory as >> AddressSpace. > > > Correct. > > >> 2. In my opinion, I have to init a AddressSpace and link it with my >> IOMMU MemoryRegion, then the bus uses this AddressSpace to translate >> the accesses. Is that right? For q35, how can I register my IOMMU >> MemoryRegion to the bus? I see that there is function >> pci_setup_iommu() that links a AddressSpace to the bus to translate >> accesses to PCI into system memory. Is that related? I think q35 >> should maintain a bus AddressSpace, but I can't find it. >> What do you think? > > > Right now, the q35 PCI host does not define an iommu_fn, so the default DMA > address space is used by pci_device_iommu_address_space. This is just > address_space_memory. > > The iommu_fn is set with pci_setup_iommu. Commit ae74bbe (apb: implement > IOMMU translation for PCI host bridge, 2014-05-28) provides an example of > how to prepare an IOMMU memory region, add it to an address space, and > return that address space from an iommu_fn. > > Thanks, Thanks very much! I saw the example of apb and typoon before and was confused that why there is no such thing in q35. And it is clear now. Thanks very much! :) ps: Send this email again because I forgot to reply to all in last email. Sorry. :) Regards, Le > Paolo > > >> Thanks very much! >> >> Regards, >> Le Tan >> >
Re: [Qemu-devel] About AddressSpace in intel-iommu emulation
2014-06-26 22:05 GMT+08:00 Paolo Bonzini : > Il 26/06/2014 16:01, Le Tan ha scritto: > >> Hi Paolo, >> I am adding intel-iommu emulation to q35 for the GSoC project. I am >> confused about AddressSpace and I believe that you can help me. :) >> 1. For intel-iommu emulation, I have to read the translation >> structures from guest memory, that is, the guest will prepare some >> tables in memory and write the physical address of them to a register >> of intel-iommu, and I need to access those structures. I use >> dma_memory_read(&address_space_memory,...) to do this. Is that right? >> I am not sure that whether accesses to address_space_memory will be >> translated through IOMMU. I think the answer is not, because I see >> that cpu_physical_memory_read() also use address_space_memory as >> AddressSpace. > > > Correct. > > >> 2. In my opinion, I have to init a AddressSpace and link it with my >> IOMMU MemoryRegion, then the bus uses this AddressSpace to translate >> the accesses. Is that right? For q35, how can I register my IOMMU >> MemoryRegion to the bus? I see that there is function >> pci_setup_iommu() that links a AddressSpace to the bus to translate >> accesses to PCI into system memory. Is that related? I think q35 >> should maintain a bus AddressSpace, but I can't find it. >> What do you think? > > > Right now, the q35 PCI host does not define an iommu_fn, so the default DMA > address space is used by pci_device_iommu_address_space. This is just > address_space_memory. > > The iommu_fn is set with pci_setup_iommu. Commit ae74bbe (apb: implement > IOMMU translation for PCI host bridge, 2014-05-28) provides an example of > how to prepare an IOMMU memory region, add it to an address space, and > return that address space from an iommu_fn. Hi Paolo, I have added the address space to q35 and the translate function of intel_iommu is called. :) However, I still have some questions here. 1. In struct IOMMUTLBEntry, I think the addr_mask field should be the mask of the page offset, right? But I see different usages of this field. In spapr_tce_translate_iommu(), the addr_mask field is assigned with the mask of the page offset. However, in pbm_translate_iommu(), in the passthrough case, the addr_mask field seems to be assigned the mask of the page number. Is there any problem here? 2. For q35, how to identify origination of DMA requests? The VT-d manual says we should use source-id(for PCI-Express devices, it is requester identifier) to map devices to domains. What is the related part in QEMU? Where can I get the source-id of a DMA request? Thanks very much! Le > Thanks, > > Paolo > > >> Thanks very much! >> >> Regards, >> Le Tan >> >
Re: [Qemu-devel] About AddressSpace in intel-iommu emulation
2014-06-27 12:55 GMT+08:00 Paolo Bonzini : > Il 27/06/2014 04:08, Le Tan ha scritto: > >> 1. In struct IOMMUTLBEntry, I think the addr_mask field should be the >> mask of the page offset, right? But I see different usages of this >> field. In spapr_tce_translate_iommu(), the addr_mask field is assigned >> with the mask of the page offset. However, in pbm_translate_iommu(), >> in the passthrough case, the addr_mask field seems to be assigned the >> mask of the page number. Is there any problem here? > > > The intended usage is the one of spapr_tce_translate_iommu(). In practice > it doesn't matter, both work. > > >> 2. For q35, how to identify origination of DMA requests? The VT-d >> manual says we should use source-id(for PCI-Express devices, it is >> requester identifier) to map devices to domains. What is the related >> part in QEMU? Where can I get the source-id of a DMA request? > > > You need to create a different AddressSpace for each PCI bus or device. How to create a different AddressSpace for each device? I thought a AddressSpace just belongs to a PCI bus before. The paging structures for different functions of the same device can also be different, too. So maybe we should create a different AddressSpace for each function? How to achieve it? Could you give me some more hints or is there any existing example in QEMU? Thanks very much! Le > Paolo
Re: [Qemu-devel] About AddressSpace in intel-iommu emulation
2014-06-27 17:55 GMT+08:00 Jan Kiszka : > On 2014-06-27 07:46, Le Tan wrote: >> 2014-06-27 12:55 GMT+08:00 Paolo Bonzini : >>> Il 27/06/2014 04:08, Le Tan ha scritto: >>> >>>> 1. In struct IOMMUTLBEntry, I think the addr_mask field should be the >>>> mask of the page offset, right? But I see different usages of this >>>> field. In spapr_tce_translate_iommu(), the addr_mask field is assigned >>>> with the mask of the page offset. However, in pbm_translate_iommu(), >>>> in the passthrough case, the addr_mask field seems to be assigned the >>>> mask of the page number. Is there any problem here? >>> >>> >>> The intended usage is the one of spapr_tce_translate_iommu(). In practice >>> it doesn't matter, both work. >>> >>> >>>> 2. For q35, how to identify origination of DMA requests? The VT-d >>>> manual says we should use source-id(for PCI-Express devices, it is >>>> requester identifier) to map devices to domains. What is the related >>>> part in QEMU? Where can I get the source-id of a DMA request? >>> >>> >>> You need to create a different AddressSpace for each PCI bus or device. >> >> How to create a different AddressSpace for each device? I thought a >> AddressSpace just belongs to a PCI bus before. The paging structures >> for different functions of the same device can also be different, too. >> So maybe we should create a different AddressSpace for each function? >> How to achieve it? Could you give me some more hints or is there any >> existing example in QEMU? > > I would suggest to study the apb IOMMU implementation Paolo referenced > and the PCI layer functions used by that code. Specifically, > pci_setup_iommu takes a callback that is supposed to return an address > space to be used for a particular device. For apb, it's the same for all > devices on a bus, but that's not required... Yeah, I notice that the third parameter passed to bus->iommu_fn() is the key. That is the dev->devfn, which is the combination of #device and #function. I was so careless that I didn't noticed this field. I think I am getting close to the answer. I will dive into this later. :) Thanks very much! Regards, Le > > Jan > >
[Qemu-devel] Why devfn will be -1
Hi Jan, I use pci_setup_iommu() to setup a PCIIOMMUFunc for the q35 pci bus. In the iommu_fn, I print out the devfn parameter and find out that it sometimes will be -1. So what does it mean? The detail code is here: In mch_init() function, I write like this: PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch))); pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu); And in q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn), I print out the devfn parameter, sometimes it will be -1. Thanks very much! Regards, Le
Re: [Qemu-devel] Why devfn will be -1
Hi Jan, 2014-07-01 15:34 GMT+08:00 Jan Kiszka : > Hi Le, > > On 2014-07-01 04:34, Le Tan wrote: >> Hi Jan, >> I use pci_setup_iommu() to setup a PCIIOMMUFunc for the q35 pci bus. >> In the iommu_fn, I print out the devfn parameter and find out that it >> sometimes will be -1. So what does it mean? >> The detail code is here: >> >> In mch_init() function, I write like this: >> PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch))); >> pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu); >> >> And in q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn), I >> print out the devfn parameter, sometimes it will be -1. > > Hmm, I have no idea about the reason and would suggest to set a > conditional breakpoint on this function, then print the backtrace to see > where this comes from and analyze the device structure from where that > -1 was most probably taken. I have set a breakpoint here and cases whose devfn is -1 are different from each run. ICH9 LPC, cirrus-vga and e1000, these three devices' initialization will route to this problem. Sometimes the initialization of ICH9 LPC will be normal. The backtrace are given below. I trace to the function do_pci_register_device() and see that the dev->devfn is initialized after the pci_device_iommu_address_space() is called. So I think this is so strange. Is the devfn parameter to the q35_host_dma_iommu() not reliable and usable? Thanks very much! #0 q35_host_dma_iommu (bus=0x5629d280, opaque=0x566a4df0, devfn=-1) at /home/tamlok/qemu-git/hw/pci-host/q35.c:351 #1 0x558524ef in pci_device_iommu_address_space (dev=0x566a7e20) at /home/tamlok/qemu-git/hw/pci/pci.c:2284 #2 0x5584e5b4 in do_pci_register_device (pci_dev=0x566a7e20, bus=0x5629d280, name=0x5624a250 "ICH9 LPC", devfn=248) at /home/tamlok/qemu-git/hw/pci/pci.c:830 #3 0x55850bac in pci_qdev_init (qdev=0x566a7e20) at /home/tamlok/qemu-git/hw/pci/pci.c:1759 #4 0x557c9359 in device_realize (dev=0x566a7e20, errp=0x7fffdd50) at /home/tamlok/qemu-git/hw/core/qdev.c:182 #5 0x557cb15c in device_set_realized (obj=0x566a7e20, value=true, errp=0x7fffded0) at /home/tamlok/qemu-git/hw/core/qdev.c:809 #6 0x558d3234 in property_set_bool (obj=0x566a7e20, v=0x5668bec0, opaque=0x5662b360, name=0x559c2a86 "realized", errp=0x7fffded0) at /home/tamlok/qemu-git/qom/object.c:1421 #7 0x558d1a68 in object_property_set (obj=0x566a7e20, v=0x5668bec0, name=0x559c2a86 "realized", errp=0x7fffded0) at /home/tamlok/qemu-git/qom/object.c:819 #8 0x558d3a26 in object_property_set_qobject (obj=0x566a7e20, value=0x566acfd0, name=0x559c2a86 "realized", errp=0x7fffded0) at /home/tamlok/qemu-git/qom/qom-qobject.c:24 #9 0x558d1d65 in object_property_set_bool (obj=0x566a7e20, value=true, name=0x559c2a86 "realized", errp=0x7fffded0) at /home/tamlok/qemu-git/qom/object.c:883 #10 0x557c9295 in qdev_init (dev=0x566a7e20) at /home/tamlok/qemu-git/hw/core/qdev.c:167 #11 0x557c98a3 in qdev_init_nofail (dev=0x566a7e20) at /home/tamlok/qemu-git/hw/core/qdev.c:290 #12 0x55850d6f in pci_create_simple_multifunction (bus=0x5629d280, devfn=248, multifunction=true, name=0x559a3783 "ICH9 LPC") at /home/tamlok/qemu-git/hw/pci/pci.c:1800 #13 0x556943ce in pc_q35_init (machine=0x56281c00) at /home/tamlok/qemu-git/hw/i386/pc_q35.c:176 #14 0x5573c72f in main (argc=19, argv=0x7fffe4d8, envp=0x7fffe578) at /home/tamlok/qemu-git/vl.c:4441 #0 q35_host_dma_iommu (bus=0x5629d280, opaque=0x566a4df0, devfn=-1) at /home/tamlok/qemu-git/hw/pci-host/q35.c:351 #1 0x558524ef in pci_device_iommu_address_space (dev=0x566ef530) at /home/tamlok/qemu-git/hw/pci/pci.c:2284 #2 0x5584e5b4 in do_pci_register_device (pci_dev=0x566ef530, bus=0x5629d280, name=0x562551a0 "cirrus-vga", devfn=8) at /home/tamlok/qemu-git/hw/pci/pci.c:830 #3 0x55850bac in pci_qdev_init (qdev=0x566ef530) at /home/tamlok/qemu-git/hw/pci/pci.c:1759 #4 0x557c9359 in device_realize (dev=0x566ef530, errp=0x7fffdca0) at /home/tamlok/qemu-git/hw/core/qdev.c:182 #5 0x557cb15c in device_set_realized (obj=0x566ef530, value=true, errp=0x7fffde20) at /home/tamlok/qemu-git/hw/core/qdev.c:809 #6 0x558d3234 in property_set_bool (obj=0x566ef530, v=0x5666e300, opaque=0x56856f90, name=0x559c2a86 "realized", errp=0x7fffde20) at /home/tamlok/qemu-git/qom/object.c:1421 #7 0x558d1a68 in object_property_set (obj=0x566ef530, v=0x5666e300, name=0x559c2a86 "realized", errp=0x7fffde2
Re: [Qemu-devel] Why devfn will be -1
2014-07-01 20:52 GMT+08:00 Le Tan : > Hi Jan, > > 2014-07-01 15:34 GMT+08:00 Jan Kiszka : >> Hi Le, >> >> On 2014-07-01 04:34, Le Tan wrote: >>> Hi Jan, >>> I use pci_setup_iommu() to setup a PCIIOMMUFunc for the q35 pci bus. >>> In the iommu_fn, I print out the devfn parameter and find out that it >>> sometimes will be -1. So what does it mean? >>> The detail code is here: >>> >>> In mch_init() function, I write like this: >>> PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch))); >>> pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu); >>> >>> And in q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn), I >>> print out the devfn parameter, sometimes it will be -1. >> >> Hmm, I have no idea about the reason and would suggest to set a >> conditional breakpoint on this function, then print the backtrace to see >> where this comes from and analyze the device structure from where that >> -1 was most probably taken. I think maybe this is a bug? In the function do_pci_register_device(), maybe these two sentence should be reorder? dma_as = pci_device_iommu_address_space(pci_dev); pci_dev->devfn = devfn; > I have set a breakpoint here and cases whose devfn is -1 are different > from each run. ICH9 LPC, cirrus-vga and e1000, these three devices' > initialization will route to this problem. Sometimes the > initialization of ICH9 LPC will be normal. The backtrace are given > below. I trace to the function do_pci_register_device() and see that > the dev->devfn is initialized after the > pci_device_iommu_address_space() is called. So I think this is so > strange. Is the devfn parameter to the q35_host_dma_iommu() not > reliable and usable? > Thanks very much! > > #0 q35_host_dma_iommu (bus=0x5629d280, opaque=0x566a4df0, devfn=-1) > at /home/tamlok/qemu-git/hw/pci-host/q35.c:351 > #1 0x558524ef in pci_device_iommu_address_space > (dev=0x566a7e20) at /home/tamlok/qemu-git/hw/pci/pci.c:2284 > #2 0x5584e5b4 in do_pci_register_device > (pci_dev=0x566a7e20, bus=0x5629d280, > name=0x5624a250 "ICH9 LPC", devfn=248) at > /home/tamlok/qemu-git/hw/pci/pci.c:830 > #3 0x55850bac in pci_qdev_init (qdev=0x566a7e20) at > /home/tamlok/qemu-git/hw/pci/pci.c:1759 > #4 0x557c9359 in device_realize (dev=0x566a7e20, > errp=0x7fffdd50) > at /home/tamlok/qemu-git/hw/core/qdev.c:182 > #5 0x557cb15c in device_set_realized (obj=0x566a7e20, > value=true, errp=0x7fffded0) > at /home/tamlok/qemu-git/hw/core/qdev.c:809 > #6 0x558d3234 in property_set_bool (obj=0x566a7e20, > v=0x5668bec0, opaque=0x5662b360, > name=0x559c2a86 "realized", errp=0x7fffded0) at > /home/tamlok/qemu-git/qom/object.c:1421 > #7 0x558d1a68 in object_property_set (obj=0x566a7e20, > v=0x5668bec0, name=0x559c2a86 "realized", > errp=0x7fffded0) at /home/tamlok/qemu-git/qom/object.c:819 > #8 0x558d3a26 in object_property_set_qobject > (obj=0x566a7e20, value=0x566acfd0, > name=0x559c2a86 "realized", errp=0x7fffded0) at > /home/tamlok/qemu-git/qom/qom-qobject.c:24 > #9 0x558d1d65 in object_property_set_bool > (obj=0x566a7e20, value=true, name=0x559c2a86 "realized", > errp=0x7fffded0) at /home/tamlok/qemu-git/qom/object.c:883 > #10 0x557c9295 in qdev_init (dev=0x566a7e20) at > /home/tamlok/qemu-git/hw/core/qdev.c:167 > #11 0x557c98a3 in qdev_init_nofail (dev=0x566a7e20) at > /home/tamlok/qemu-git/hw/core/qdev.c:290 > #12 0x55850d6f in pci_create_simple_multifunction > (bus=0x5629d280, devfn=248, multifunction=true, > name=0x559a3783 "ICH9 LPC") at /home/tamlok/qemu-git/hw/pci/pci.c:1800 > #13 0x556943ce in pc_q35_init (machine=0x56281c00) at > /home/tamlok/qemu-git/hw/i386/pc_q35.c:176 > #14 0x5573c72f in main (argc=19, argv=0x7fffe4d8, > envp=0x7fffe578) at /home/tamlok/qemu-git/vl.c:4441 > > #0 q35_host_dma_iommu (bus=0x5629d280, opaque=0x566a4df0, devfn=-1) > at /home/tamlok/qemu-git/hw/pci-host/q35.c:351 > #1 0x558524ef in pci_device_iommu_address_space > (dev=0x566ef530) at /home/tamlok/qemu-git/hw/pci/pci.c:2284 > #2 0x5584e5b4 in do_pci_register_device > (pci_dev=0x566ef530, bus=0x5629d280, > name=0x562551a0 "cirrus-vga", devfn=8) at > /home/tamlok/qemu-git/hw/pci/pci.c:830 > #3 0x55850bac in pci_qdev_init (qdev=0x566ef530) at > /home/tamlok/qemu-git/hw/pci/pci.c:1759 >
Re: [Qemu-devel] Why devfn will be -1
2014-07-01 20:56 GMT+08:00 Jan Kiszka : > On 2014-07-01 14:55, Le Tan wrote: >> 2014-07-01 20:52 GMT+08:00 Le Tan : >>> Hi Jan, >>> >>> 2014-07-01 15:34 GMT+08:00 Jan Kiszka : >>>> Hi Le, >>>> >>>> On 2014-07-01 04:34, Le Tan wrote: >>>>> Hi Jan, >>>>> I use pci_setup_iommu() to setup a PCIIOMMUFunc for the q35 pci bus. >>>>> In the iommu_fn, I print out the devfn parameter and find out that it >>>>> sometimes will be -1. So what does it mean? >>>>> The detail code is here: >>>>> >>>>> In mch_init() function, I write like this: >>>>> PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch))); >>>>> pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu); >>>>> >>>>> And in q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn), I >>>>> print out the devfn parameter, sometimes it will be -1. >>>> >>>> Hmm, I have no idea about the reason and would suggest to set a >>>> conditional breakpoint on this function, then print the backtrace to see >>>> where this comes from and analyze the device structure from where that >>>> -1 was most probably taken. >> >> I think maybe this is a bug? In the function do_pci_register_device(), >> maybe these two sentence should be reorder? >> dma_as = pci_device_iommu_address_space(pci_dev); >> pci_dev->devfn = devfn; > > Looks like. Give it a try, then possibly send a patch :) I reorder these two sentences and get the print log like this: vtd bus 0 slot 31 func 0 devfn 248 vtd bus 0 slot 31 func 2 devfn 250 vtd bus 0 slot 31 func 3 devfn 251 vtd bus 0 slot 1 func 0 devfn 8 vtd bus 0 slot 2 func 0 devfn 16 The "info pci" output is here: (qemu) info pci Bus 0, device 0, function 0: Host bridge: PCI device 8086:29c0 id "" Bus 0, device 1, function 0: VGA controller: PCI device 1013:00b8 BAR0: 32 bit prefetchable memory at 0xfc00 [0xfdff]. BAR1: 32 bit memory at 0xfebf [0xfebf0fff]. BAR6: 32 bit memory at 0x [0xfffe]. id "" Bus 0, device 2, function 0: Ethernet controller: PCI device 8086:100e IRQ 11. BAR0: 32 bit memory at 0xfebc [0xfebd]. BAR1: I/O at 0xc000 [0xc03f]. BAR6: 32 bit memory at 0x [0x0003fffe]. id "" Bus 0, device 31, function 0: ISA bridge: PCI device 8086:2918 id "" Bus 0, device 31, function 2: SATA controller: PCI device 8086:2922 IRQ 10. BAR4: I/O at 0xc080 [0xc09f]. BAR5: 32 bit memory at 0xfebf1000 [0xfebf1fff]. id "" Bus 0, device 31, function 3: SMBus: PCI device 8086:2930 IRQ 10. BAR4: I/O at 0x0700 [0x073f]. id "" So maybe it is all right now? And I will go on the vtd emulation.:) Thanks very much! > Jan >
[Qemu-devel] [PATCH] pci: assign devfn to pci_dev before calling pci_device_iommu_address_space()
In function do_pci_register_device() in file hw/pci/pci.c, move the assignment of pci_dev->devfn to the position before the call to pci_device_iommu_address_space(pci_dev) which will use the value of pci_dev->devfn. Signed-off-by: Le Tan --- hw/pci/pci.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 17ed510..351d320 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -827,6 +827,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, } pci_dev->bus = bus; +pci_dev->devfn = devfn; dma_as = pci_device_iommu_address_space(pci_dev); memory_region_init_alias(&pci_dev->bus_master_enable_region, @@ -836,7 +837,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region, name); -pci_dev->devfn = devfn; pstrcpy(pci_dev->name, sizeof(pci_dev->name), name); pci_dev->irq_state = 0; pci_config_alloc(pci_dev); -- 1.7.9.5
[Qemu-devel] [PATCH] ahci: map memory via device's address space instead of address_space_memory
In map_page() in hw/ide/ahci.c, replace cpu_physical_memory_map() and cpu_physical_memory_unmap() with dma_memory_map() and dma_memory_unmap(), because ahci devices should not access memory directly but via their address space. Add an AddressSpace parameter to map_page(). In order to call map_page(), we should pass the AHCIState.as as the AddressSpace argument. Signed-off-by: Le Tan --- hw/ide/ahci.c | 21 +++-- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 9bae22e..7bb0a03 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -175,17 +175,18 @@ static void ahci_trigger_irq(AHCIState *s, AHCIDevice *d, ahci_check_irq(s); } -static void map_page(uint8_t **ptr, uint64_t addr, uint32_t wanted) +static void map_page(AddressSpace *as, uint8_t **ptr, uint64_t addr, + uint32_t wanted) { hwaddr len = wanted; if (*ptr) { -cpu_physical_memory_unmap(*ptr, len, 1, len); +dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len); } -*ptr = cpu_physical_memory_map(addr, &len, 1); +*ptr = dma_memory_map(as, addr, &len, DMA_DIRECTION_FROM_DEVICE); if (len < wanted) { -cpu_physical_memory_unmap(*ptr, len, 1, len); +dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len); *ptr = NULL; } } @@ -198,24 +199,24 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val) switch (offset) { case PORT_LST_ADDR: pr->lst_addr = val; -map_page(&s->dev[port].lst, +map_page(s->as, &s->dev[port].lst, ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024); s->dev[port].cur_cmd = NULL; break; case PORT_LST_ADDR_HI: pr->lst_addr_hi = val; -map_page(&s->dev[port].lst, +map_page(s->as, &s->dev[port].lst, ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024); s->dev[port].cur_cmd = NULL; break; case PORT_FIS_ADDR: pr->fis_addr = val; -map_page(&s->dev[port].res_fis, +map_page(s->as, &s->dev[port].res_fis, ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256); break; case PORT_FIS_ADDR_HI: pr->fis_addr_hi = val; -map_page(&s->dev[port].res_fis, +map_page(s->as, &s->dev[port].res_fis, ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256); break; case PORT_IRQ_STAT: @@ -1260,9 +1261,9 @@ static int ahci_state_post_load(void *opaque, int version_id) ad = &s->dev[i]; AHCIPortRegs *pr = &ad->port_regs; -map_page(&ad->lst, +map_page(s->as, &ad->lst, ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024); -map_page(&ad->res_fis, +map_page(s->as, &ad->res_fis, ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256); /* * All pending i/o should be flushed out on a migrate. However, -- 1.7.9.5
[Qemu-devel] How to decide the value of Host Address Width in ACPI DMAR table
Hi, I am doing Intel IOMMU emulation for q35. I need to add a DMAR table to ACPI. There is one field in the DMAR table called "Host Address Width", which indicates the maximum DMA physical addressability supported by this platform. I am not sure what this value should be and how to get this value. What is the best way to decide the value of HAW? Any suggestion is appreciated. :) Thanks very much! Regards, Le Tan
[Qemu-devel] When QEMU is emulating DMA, can other vcpus read/write the same memory region?
Hi, I have been confused by this question for a long time, both in hardware and software. First, in the real x86 hardware architectures, while the DMA is operating, can other CPUs access the same memory region? Or the DMA controller will hold the bus and the memory exclusively and CPUs have to wait? Of course, we assume that the operating system is too stupid to provide the mutex in software. Second, if the CPUs have to wait, then how about in QEMU? When QEMU is emulating the DMA, can other vcpus access the memory (or exactly, the same memory region)? If not, how does QEMU achieve this? That is, how does QEMU prevent other vcpus to access the memory? I have read through the part of the emulation of DMA, but I didn't see anything related to this. Thanks very much! --Le Tan
Re: [Qemu-devel] [GSoC] Wanted: small warmup tasks
2014-04-24 19:38 GMT+08:00 Andreas Färber : > -BEGIN PGP SIGNED MESSAGE- > Hash: SHA1 > > Am 24.04.2014 08:19, schrieb Jan Kiszka: >> On 2014-04-23 11:25, Stefan Hajnoczi wrote: >>> Dear QEMU, Libvirt, and KVM communities, We are participating in >>> Google Summer of Code 2014 (http://google-melange.com/) and >>> Outreach Program for Women (http://opw.gnome.org/). Both >>> programs fund candidates to work on our open source projects for >>> 12 weeks this summer. >> >> To follow up on this: I'm currently looking for optional tiny >> "warmup" tasks for our QEMU students during the bonding period >> (till May 18). If you have any trivial issues or extensions in mind >> that someone could address within a few days or even hours, that >> would be perfect. It could even be something like "reformat the >> printing of these messages" or so. > > Replacing some more fprintf(stderr, "foo\n") with error_report("foo") > comes to mind. :) Hi, Having consulted my mentor, I want to pick up this as the warm-up task.:) I will dig into it in the following days and learn how to contribute in the community. Thanks! --Le Tan
[Qemu-devel] VNC viewer displays nothing with -enable-kvm but works fine without -enable-kvm
Hi, I used qemu-kvm-1.2.0 before. I can run qemu with kvm well. Two days ago, I updated qemu to qemu-1.6.2, and start the qemu with the command: qemu-system-x86_64 -m 2048 -smp 4 -hda /home/tanle/study/new.img -nographic -vnc 162.105.146.118:2 It works fine and slow. It is obviously not using the kvm to accelerate. Then I use the command: qemu-system-x86_64 -m 2048 -smp 4 -hda /home/tanle/study/new.img -nographic -vnc 162.105.146.118:2 -enable-kvm The VNC viewer can connect to the vm, but it is black and displays nothing. What is wrong here? My kernel version is 3.11.0. Will it be the problem of compatibility? Thanks! --Le Tan
[Qemu-devel] How to understand the coroutine context?
Hi, I am diving into the source code of qemu. I see the word "coroutine" appears in so many places. I can't figure out what it means. So, please, can anyone help me, telling me the mechanism or semantic of "coroutine"? Thanks! --Le Tan
Re: [Qemu-devel] How to understand the coroutine context?
Thanks, that is an excellent blog! 2014-03-18 12:04 GMT+08:00 Kashyap Chamarthy : > On Tue, Mar 18, 2014 at 07:56:16AM +0800, Le Tan wrote: >> Hi, I am diving into the source code of qemu. I see the word >> "coroutine" appears in so many places. I can't figure out what it >> means. So, please, can anyone help me, telling me the mechanism or >> semantic of "coroutine"? Thanks! > > This might be of help -- > http://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html > > > -- > /kashyap
[Qemu-devel] About Disk I/O and DMA emulation in qemu-kvm
Hi, I am now diving into the disk I/O in kvm. But I find that things are a little messy about this and I am stuck here. I configure the virtual machine with a virtual IDE disk and DMA support. I need to get every disk I/O issued by the virtual machine. I know that there are struct IDEDevie, struct IDEBus, struct IDEDMA etc. in the qemu. But I just can't figure out the logical relationships among these things and the general code path of the disk I/O or DMA. So, how is IDE disk or DMA emulated in qemu-kvm? What is the gerneral path or logic when the vcpu issues a disk I/O? Can anyone give me the complete gerneral description or idea of the disk I/O in qemu-kvm? Or which part of the code should be taken in consideration? Maybe there are some useful materials? Maybe the question is a little complicated. Any help is appreciated! Thanks! --Le Tan
Re: [Qemu-devel] VNC viewer displays nothing with -enable-kvm but works fine without -enable-kvm
I have tried 1.7.0 and it doesn't work either. And I find that it doesn't work with QEMU whose version is above 1.3. Then I change the kernel from 3.11.0 to the latest release, and it works fine with QEMU 1.6.2 and 1.7.0!!! I know that qemu-kvm have been merged back into qemu after 1.2.2. So maybe there is something different between before and after this? Maybe it is the problem of compatibility between the old kernel and the new QEMU? Thanks. --Le Tan 2014-03-20 15:45 GMT+08:00 Stefan Hajnoczi : > On Fri, Mar 14, 2014 at 04:02:58PM +0800, Le Tan wrote: > > Hi, I used qemu-kvm-1.2.0 before. I can run qemu with kvm well. Two days > > ago, I updated qemu to qemu-1.6.2, and start the qemu with the command: > > qemu-system-x86_64 -m 2048 -smp 4 -hda /home/tanle/study/new.img > -nographic > > -vnc 162.105.146.118:2 > > It works fine and slow. It is obviously not using the kvm to accelerate. > > Then I use the command: > > qemu-system-x86_64 -m 2048 -smp 4 -hda /home/tanle/study/new.img > -nographic > > -vnc 162.105.146.118:2 -enable-kvm > > The VNC viewer can connect to the vm, but it is black and displays > nothing. > > What is wrong here? > > Please use the latest upstream QEMU release (1.7.0) or contact your > distro if you are using a packaged version of QEMU. > > Stefan >
[Qemu-devel] Disk I/O in QEMU
Hi, I am now studying the disk I/O in QEMU, but I know little about QEMU. I want to know how QEMU implements or emulates the disk I/O? Where to find some materials about this? And if I can get all the disk I/O request from the guest in QEMU? Thanks!
Re: [Qemu-devel] Disk I/O in QEMU
Hi Stefan, Thanks for your help. I find that the source code of qemu-1.2.0 doesn't have hw/block/. So I download the qemu-1.6.2. However it doesn't work. I run the configure like this: ./configure --enable-kvm --x86_64-softmmu then make and make install. I can start the vm using this command: qemu-system-x86_64 -m 1024 -smp 2 -hda my.img But the qemu is not using the kvm to accelerate. Then I add the "-enable-kvm" in the command. I connect to the vm via VNC. However, the vnc-viewer is just black and displays nothing. It used to work fine with qemu-1.2.0 (qemu-kvm). Of course, with qemu-kvm, there is no need to add the "-enable-kvm" option. So with qemu-1.6.2, did I miss any options? Thanks! 2014-03-11 20:23 GMT+08:00 Stefan Hajnoczi : > On Tue, Mar 11, 2014 at 03:08:52PM +0800, Le Tan wrote: > > Hi, I am now studying the disk I/O in QEMU, but I know little about > QEMU. I > > want to know how QEMU implements or emulates the disk I/O? Where to find > > some materials about this? And if I can get all the disk I/O request from > > the guest in QEMU? Thanks! > > QEMU emulates hardware including IDE or SCSI controllers. I/O requests > go through QEMU. Look at hw/block/. > > Stefan >
Re: [Qemu-devel] Disk I/O in QEMU
I am sorry I didn't give the complete command I have executed. It is like this: qemu-system-x86_64 -m 1024 -smp 2 -hda /home/tanle/study/new.img -nographic -vnc 192.168.146.118:2 -enable-kvm If I delete the "-enable-kvm", it is ok and the system can boot up (just slow). But with "-enable-kvm", the vnc viewer is just black, displaying nothing. So did I miss any options? Or there is something else wrong? Thanks! 2014-03-12 21:13 GMT+08:00 Fam Zheng : > On Wed, 03/12 20:27, Le Tan wrote: > > Hi Stefan, > > Thanks for your help. I find that the source code of qemu-1.2.0 doesn't > > have hw/block/. So I download the qemu-1.6.2. However it doesn't work. I > > run the configure like this: > > ./configure --enable-kvm --x86_64-softmmu > > then make and make install. > > I can start the vm using this command: > > qemu-system-x86_64 -m 1024 -smp 2 -hda my.img > > But the qemu is not using the kvm to accelerate. Then I add the > > "-enable-kvm" in the command. I connect to the vm via VNC. However, the > > vnc-viewer is just black and displays nothing. > > It used to work fine with qemu-1.2.0 (qemu-kvm). Of course, with > qemu-kvm, > > there is no need to add the "-enable-kvm" option. > > So with qemu-1.6.2, did I miss any options? > > Thanks! > > > > But you didn't specify vnc as display in your command line? > > Try specify "-vnc :0" and connect with "vncviewer :0". > > Fam >