Re: [Qemu-devel] [PATCH] intel_iommu: fix VTD_SID_TO_BUS

2014-10-20 Thread Le Tan
Hi Markus,

2014-10-20 19:41 GMT+08:00 Markus Armbruster :
> "Michael S. Tsirkin"  writes:
>
>> (((sid) >> 8) && 0xff)  makes no sense
>> (((sid) >> 8) & 0xff) seems to be what was meant.
>>
>> Suggested-by: Markus Armbruster 
>
> Actually by the reporter of https://bugs.launchpad.net/bugs/1382477
>
>> Signed-off-by: Michael S. Tsirkin 
>> ---
>>
>> Compile-tested only.
>>
>>  include/hw/i386/intel_iommu.h | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
>> index f4701e1..e321ee4 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -37,7 +37,7 @@
>>  #define VTD_PCI_DEVFN_MAX   256
>>  #define VTD_PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
>>  #define VTD_PCI_FUNC(devfn) ((devfn) & 0x07)
>> -#define VTD_SID_TO_BUS(sid) (((sid) >> 8) && 0xff)
>> +#define VTD_SID_TO_BUS(sid) (((sid) >> 8) & 0xff)
>>  #define VTD_SID_TO_DEVFN(sid)   ((sid) & 0xff)
>>
>>  #define DMAR_REG_SIZE   0x230
>
> Can't find the spec right now, but it looks plausible enough.

Yes, this is a typo. I am sorry that I introduced such a mistake.
The spec is here in Section 3.4 :
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/vt-directed-io-spec.html

VTD_SID_TO_BUS(sid) is intended to be used to get the bus id from the
source identifier.

Thanks very much!

Regards,
Le

> Only use is in vtd_context_device_invalidate().  Bug's impact isn't
> obvious to me.
>
> Reviewed-by: Markus Armbruster 



[Qemu-devel] [PATCH 2/3] intel-iommu: add DMAR table to ACPI tables

2014-07-22 Thread Le Tan
Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE exists,
add DMAR table to ACPI RSDT table. For now the DMAR table indicates that there
is only one hardware unit without INTR_REMAP capability on the platform.

Signed-off-by: Le Tan 
---
 hw/i386/acpi-build.c | 41 ++
 hw/i386/acpi-defs.h  | 70 
 2 files changed, 111 insertions(+)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ebc5f03..8241621 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -45,6 +45,7 @@
 #include "hw/i386/ich9.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci-host/q35.h"
+#include "hw/i386/intel_iommu.h"
 
 #include "hw/i386/q35-acpi-dsdt.hex"
 #include "hw/i386/acpi-dsdt.hex"
@@ -1316,6 +1317,31 @@ build_mcfg_q35(GArray *table_data, GArray *linker, 
AcpiMcfgInfo *info)
 }
 
 static void
+build_dmar_q35(GArray *table_data, GArray *linker)
+{
+int dmar_start = table_data->len;
+
+AcpiTableDmar *dmar;
+AcpiDmarHardwareUnit *drhd;
+
+dmar = acpi_data_push(table_data, sizeof(*dmar));
+dmar->host_address_width = 0x26;/* 0x26 + 1 = 39 */
+dmar->flags = 0;/* No intr_remap for now */
+
+/* DMAR Remapping Hardware Unit Definition structure */
+drhd = acpi_data_push(table_data, sizeof(*drhd));
+drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
+drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope now */
+drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
+drhd->pci_segment = cpu_to_le16(0);
+drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
+
+build_header(linker, table_data, (void *)(table_data->data + dmar_start),
+ "DMAR", table_data->len - dmar_start, 1);
+}
+
+
+static void
 build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc)
 {
 AcpiTableHeader *dsdt;
@@ -1436,6 +1462,17 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
 return true;
 }
 
+static bool acpi_has_iommu(void)
+{
+bool ambiguous;
+Object *intel_iommu;
+
+intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE,
+   &ambiguous);
+return intel_iommu && !ambiguous;
+}
+
+
 static
 void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
 {
@@ -1497,6 +1534,10 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables 
*tables)
 acpi_add_table(table_offsets, tables->table_data);
 build_mcfg_q35(tables->table_data, tables->linker, &mcfg);
 }
+if (acpi_has_iommu()) {
+acpi_add_table(table_offsets, tables->table_data);
+build_dmar_q35(tables->table_data, tables->linker);
+}
 
 /* Add tables supplied by user (if any) */
 for (u = acpi_table_first(); u; u = acpi_table_next(u)) {
diff --git a/hw/i386/acpi-defs.h b/hw/i386/acpi-defs.h
index e93babb..9674825 100644
--- a/hw/i386/acpi-defs.h
+++ b/hw/i386/acpi-defs.h
@@ -314,4 +314,74 @@ struct AcpiTableMcfg {
 } QEMU_PACKED;
 typedef struct AcpiTableMcfg AcpiTableMcfg;
 
+/* DMAR - DMA Remapping table r2.2 */
+struct AcpiTableDmar {
+ACPI_TABLE_HEADER_DEF
+uint8_t host_address_width; /* Maximum DMA physical addressability */
+uint8_t flags;
+uint8_t reserved[10];
+} QEMU_PACKED;
+typedef struct AcpiTableDmar AcpiTableDmar;
+
+/* Masks for Flags field above */
+#define ACPI_DMAR_INTR_REMAP(1)
+#define ACPI_DMAR_X2APIC_OPT_OUT(2)
+
+/*
+ * DMAR sub-structures (Follow DMA Remapping table)
+ */
+#define ACPI_DMAR_SUB_HEADER_DEF /* Common ACPI DMAR sub-structure header */\
+uint16_t type;  \
+uint16_t length;
+
+/* Values for sub-structure type for DMAR */
+enum {
+ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,   /* DRHD */
+ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */
+ACPI_DMAR_TYPE_ATSR = 2,/* ATSR */
+ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3,   /* RHSR */
+ACPI_DMAR_TYPE_ANDD = 4,/* ANDD */
+ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */
+};
+
+/*
+ * Sub-structures for DMAR, correspond to Type in ACPI_DMAR_SUB_HEADER_DEF
+ */
+
+/* DMAR Device Scope structures */
+struct AcpiDmarDeviceScope {
+uint8_t type;
+uint8_t length;
+uint16_t reserved;
+uint8_t enumeration_id;
+uint8_t start_bus_number;
+uint8_t path[0];
+} QEMU_PACKED;
+typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
+
+/* Values for type in struct AcpiDmarDeviceScope */
+enum {
+ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0,
+ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1,
+ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2,
+ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3,
+ACPI_DMAR_SCOPE_TYPE_HPET = 4,
+ACPI_DMAR_SCOPE_TYPE_ACPI = 5,
+ACPI_DMAR_SCOPE_TYPE_RESERVED = 6 /* Reserved for future use */
+};
+
+/* 0: Hardware Unit Definition */
+struct AcpiDmarHardwareUnit {
+ACPI_DMAR_SUB_HEADER_DEF
+uin

[Qemu-devel] [PATCH 1/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation

2014-07-22 Thread Le Tan
Add support for emulating Intel IOMMU according to the VT-d specification for
the q35 chipset machine. Implement the logic for DMAR (DMA remapping) without
PASID support. Use register-based invalidation for context-cache invalidation
and IOTLB invalidation.
Basic fault reporting and caching are not implemented yet.

Signed-off-by: Le Tan 
---
 hw/i386/Makefile.objs |1 +
 hw/i386/intel_iommu.c | 1139 +
 include/hw/i386/intel_iommu.h |  350 +
 3 files changed, 1490 insertions(+)
 create mode 100644 hw/i386/intel_iommu.c
 create mode 100644 include/hw/i386/intel_iommu.h

diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 48014ab..6936111 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/
 obj-y += multiboot.o smbios.o
 obj-y += pc.o pc_piix.o pc_q35.o
 obj-y += pc_sysfw.o
+obj-y += intel_iommu.o
 obj-$(CONFIG_XEN) += ../xenpv/ xen/
 
 obj-y += kvmvapic.o
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
new file mode 100644
index 000..3ba0e1e
--- /dev/null
+++ b/hw/i386/intel_iommu.c
@@ -0,0 +1,1139 @@
+/*
+ * QEMU emulation of an Intel IOMMU (VT-d)
+ *   (DMA Remapping device)
+ *
+ * Copyright (c) 2013 Knut Omang, Oracle 
+ * Copyright (C) 2014 Le Tan, 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include "hw/sysbus.h"
+#include "exec/address-spaces.h"
+#include "hw/i386/intel_iommu.h"
+
+/* #define DEBUG_INTEL_IOMMU */
+#ifdef DEBUG_INTEL_IOMMU
+#define D(fmt, ...) \
+do { fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
+ ## __VA_ARGS__); } while (0)
+#else
+#define D(fmt, ...) \
+do { } while (0)
+#endif
+
+
+static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
+uint64_t wmask, uint64_t w1cmask)
+{
+*((uint64_t *)&s->csr[addr]) = val;
+*((uint64_t *)&s->wmask[addr]) = wmask;
+*((uint64_t *)&s->w1cmask[addr]) = w1cmask;
+}
+
+static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr,
+  uint64_t mask)
+{
+*((uint64_t *)&s->womask[addr]) = mask;
+}
+
+static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val,
+uint32_t wmask, uint32_t w1cmask)
+{
+*((uint32_t *)&s->csr[addr]) = val;
+*((uint32_t *)&s->wmask[addr]) = wmask;
+*((uint32_t *)&s->w1cmask[addr]) = w1cmask;
+}
+
+static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr,
+  uint32_t mask)
+{
+*((uint32_t *)&s->womask[addr]) = mask;
+}
+
+/* "External" get/set operations */
+static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val)
+{
+uint64_t oldval = *((uint64_t *)&s->csr[addr]);
+uint64_t wmask = *((uint64_t *)&s->wmask[addr]);
+uint64_t w1cmask = *((uint64_t *)&s->w1cmask[addr]);
+*((uint64_t *)&s->csr[addr]) =
+((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val);
+}
+
+static inline void set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val)
+{
+uint32_t oldval = *((uint32_t *)&s->csr[addr]);
+uint32_t wmask = *((uint32_t *)&s->wmask[addr]);
+uint32_t w1cmask = *((uint32_t *)&s->w1cmask[addr]);
+*((uint32_t *)&s->csr[addr]) =
+((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val);
+}
+
+static inline uint64_t get_quad(IntelIOMMUState *s, hwaddr addr)
+{
+uint64_t val = *((uint64_t *)&s->csr[addr]);
+uint64_t womask = *((uint64_t *)&s->womask[addr]);
+return val & ~womask;
+}
+
+
+static inline uint32_t get_long(IntelIOMMUState *s, hwaddr addr)
+{
+uint32_t val = *((uint32_t *)&s->csr[addr]);
+uint32_t womask = *((uint32_t *)&s->womask[addr]);
+return val & ~womask;
+}
+
+
+
+/* "Internal" get/set operations */
+static inline uint64_t __get_quad(IntelIOMMUState *s, hwaddr addr)
+{
+return *((uint64_t *)&s->csr[addr]);
+}
+
+static inline uint32_t __get_long(IntelIOMMUState *s, hwaddr addr)
+{
+return *((uint32_t *)&s->csr[ad

[Qemu-devel] [PATCH 0/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset

2014-07-22 Thread Le Tan
Hi,

These patches are intended to introduce Intel IOMMU (VT-d) emulation to q35
chipset. The major job in these patches is to add support for emulating Intel
IOMMU according to the VT-d specification, including basic responses to CSRs
accesses, the logic of DMAR (DMA remapping) and DMA memory address
translations.

Features implemented for now are:
1. Response to important CSRs accesses;
2. DMAR (DMA remapping) without PASID support;
3. Use register-based invalidation for IOTLB and context cache invalidation;
4. Add DMAR table to ACPI tables to expose VT-d to BIOS;
5. Add "-machine vtd=on|off" option to enable/disable VT-d;
6. Only one DMAR unit for all the devices of PCI Segment 0.

Testing:
1. L1 guest with Linux with intel_iommu=on can interact with VT-d and boot
smoothly, and I can see info about VT-d in log of kernel;
2. Run L1 with VT-d, L2 guest with Linux can boot smoothly withou PCI device
passthrough;
3. Run L1 with VT-d and "-soundhw ac97 (QEMU_AUDIO_DRV=none)", then assign the
sound card to L2; L2 can boot smoothly with legacy PCI assignment;
4. Jailhouse hypervisor seems to run smoothly for now (tested by Jan).
5. Run L1 with VT-d and e1000 network card, then assign e1000 to L2; L2 will be
STUCK when booting. This still remains unsolved now. As far as I know, I suppose
that the L2 crashes when doing e1000_probe(). The QEMU of L1 will dump
something with "KVM: entry failed, hardware error 0x0", and the KVM of host
will print "nested_vmx_exit_handled failed vm entry 7". Unlike assigning the
sound card, after being assigned to L2, there is no translation entry of e1000
through VT-d, which I think means that e1000 doesn't issue any DMA access during
the boot of L2. Sometimes the kernel of L2 will print "divide error" during
booting. Can someone help me with this? Any help is appreciated! :)
6. VFIO has not been tested yet and I will do this soon.

I have some questions want to consult here:
1. Now the struct IntelIOMMUState is a member of MCHPCIState. VT-d is registered
as TYPE_SYS_BUS_DEVICE but registers its configuration MemoryRegion as subregion
of mch->pci_address_space. Is this correct? Another thought comes to my mind is
using sysbus_mmio_map() to map the MemoryRegion of VT-d. But I am not sure. And
maybe there are more improper usage of the QOM.
2. For declaration of porinter of pointer, like VTDAddressSpace 
**address_spaces,
checkpatch.pl will warn that "ERROR: need consistent spacing around '*' 
(ctx:WxO)".
Is checkpatch.pl wrong? Then what is the proper declaration?

TODO:
1. Fix the bug of legacy PCI assignment;
2. Test VFIO;
3. Queued Invalidation;
4. Basic fault reporting;
5. Caching propertities of IOTLB;
6. Clear up codes related to migration.

Thanks very much!

Git trees:
https://github.com/tamlok/qemu/commits/q35-iommu-v2

Le Tan (3):
  intel-iommu: introduce Intel IOMMU (VT-d) emulation
  intel-iommu: add DMAR table to ACPI tables
  intel-iommu: add Intel IOMMU emulation to q35 and add a machine option
"vtd" as a switch

 hw/core/machine.c |   27 +-
 hw/i386/Makefile.objs |1 +
 hw/i386/acpi-build.c  |   41 ++
 hw/i386/acpi-defs.h   |   70 +++
 hw/i386/intel_iommu.c | 1139 +
 hw/pci-host/q35.c |   72 ++-
 include/hw/boards.h   |1 +
 include/hw/i386/intel_iommu.h |  350 +
 include/hw/pci-host/q35.h |2 +
 qemu-options.hx   |5 +-
 vl.c  |4 +
 11 files changed, 1703 insertions(+), 9 deletions(-)
 create mode 100644 hw/i386/intel_iommu.c
 create mode 100644 include/hw/i386/intel_iommu.h

-- 
1.9.1




[Qemu-devel] [PATCH 3/3] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "vtd" as a switch

2014-07-22 Thread Le Tan
Add Intel IOMMU emulation to q35 chipset and expose it to the guest.
1. Add a machine option. Users can use "-machine vtd=on|off" in the command
line to enable/disable Intel IOMMU. The default is off.
2. Accroding to the machine option, q35 will initialize the Intel IOMMU and
use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function for
the pci bus.
3. q35_host_dma_iommu() will return different address space according to the
bus_num and devfn of the device.

Signed-off-by: Le Tan 
---
 hw/core/machine.c | 27 --
 hw/pci-host/q35.c | 72 +++
 include/hw/boards.h   |  1 +
 include/hw/pci-host/q35.h |  2 ++
 qemu-options.hx   |  5 +++-
 vl.c  |  4 +++
 6 files changed, 102 insertions(+), 9 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index cbba679..1be9ef2 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const char 
*value, Error **errp)
 ms->firmware = g_strdup(value);
 }
 
+static bool machine_get_vtd(Object *obj, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+return ms->vtd;
+}
+
+static void machine_set_vtd(Object *obj, bool value, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+ms->vtd = value;
+}
+
 static void machine_initfn(Object *obj)
 {
 object_property_add_str(obj, "accel",
@@ -270,10 +284,17 @@ static void machine_initfn(Object *obj)
  machine_set_dump_guest_core,
  NULL);
 object_property_add_bool(obj, "mem-merge",
- machine_get_mem_merge, machine_set_mem_merge, 
NULL);
-object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, 
NULL);
+ machine_get_mem_merge,
+ machine_set_mem_merge, NULL);
+object_property_add_bool(obj, "usb",
+ machine_get_usb,
+ machine_set_usb, NULL);
 object_property_add_str(obj, "firmware",
-machine_get_firmware, machine_set_firmware, NULL);
+machine_get_firmware,
+machine_set_firmware, NULL);
+object_property_add_bool(obj, "vtd",
+ machine_get_vtd,
+ machine_set_vtd, NULL);
 }
 
 static void machine_finalize(Object *obj)
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index a0a3068..933f76e 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -347,6 +347,61 @@ static void mch_reset(DeviceState *qdev)
 mch_update(mch);
 }
 
+static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+IntelIOMMUState *s = opaque;
+VTDAddressSpace **pvtd_as;
+VTDAddressSpace *vtd_as;
+int bus_num = pci_bus_num(bus);
+
+assert(devfn >= 0);
+
+pvtd_as = s->address_spaces[bus_num];
+if (!pvtd_as) {
+/* No corresponding free() */
+pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) *
+VTD_PCI_SLOT_MAX * VTD_PCI_FUNC_MAX);
+s->address_spaces[bus_num] = pvtd_as;
+}
+
+vtd_as = *(pvtd_as + devfn);
+if (!vtd_as) {
+vtd_as = g_malloc0(sizeof(*vtd_as));
+*(pvtd_as + devfn) = vtd_as;
+
+vtd_as->bus_num = bus_num;
+vtd_as->devfn = devfn;
+vtd_as->iommu_state = s;
+memory_region_init_iommu(&vtd_as->iommu, OBJECT(s), &s->iommu_ops,
+ "intel_iommu", UINT64_MAX);
+address_space_init(&vtd_as->as, &vtd_as->iommu, "intel_iommu");
+}
+
+return &vtd_as->as;
+}
+
+static void mch_init_dmar(MCHPCIState *mch)
+{
+Error *error = NULL;
+PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch)));
+
+mch->iommu = INTEL_IOMMU_DEVICE(object_new(TYPE_INTEL_IOMMU_DEVICE));
+qdev_set_parent_bus(DEVICE(mch->iommu), sysbus_get_default());
+object_property_set_bool(OBJECT(mch->iommu), true, "realized", &error);
+
+if (error) {
+fprintf(stderr, "%s\n", error_get_pretty(error));
+error_free(error);
+return;
+}
+
+memory_region_add_subregion(mch->pci_address_space,
+Q35_HOST_BRIDGE_IOMMU_ADDR,
+&mch->iommu->csrmem);
+pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu);
+}
+
+
 static int mch_init(PCIDevice *d)
 {
 int i;
@@ -363,13 +418,20 @@ static int mch_init(PCIDevice *d)
 memory_region_add_subregion_overlap(mch->system_memory, 0xa,
 &mch->smram_region, 1);
 memory_region_set_enabled(&mch->smram_r

Re: [Qemu-devel] [PATCH 1/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation

2014-07-22 Thread Le Tan
Hi Michael,
Thanks very much for your careful reviewing!

2014-07-23 4:05 GMT+08:00 Michael S. Tsirkin :
> On Tue, Jul 22, 2014 at 11:47:48PM +0800, Le Tan wrote:
>> Add support for emulating Intel IOMMU according to the VT-d specification for
>> the q35 chipset machine. Implement the logic for DMAR (DMA remapping) without
>> PASID support. Use register-based invalidation for context-cache invalidation
>> and IOTLB invalidation.
>> Basic fault reporting and caching are not implemented yet.
>>
>> Signed-off-by: Le Tan 
>> ---
>>  hw/i386/Makefile.objs |1 +
>>  hw/i386/intel_iommu.c | 1139 
>> +
>>  include/hw/i386/intel_iommu.h |  350 +
>>  3 files changed, 1490 insertions(+)
>>  create mode 100644 hw/i386/intel_iommu.c
>>  create mode 100644 include/hw/i386/intel_iommu.h
>>
>> diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
>> index 48014ab..6936111 100644
>> --- a/hw/i386/Makefile.objs
>> +++ b/hw/i386/Makefile.objs
>> @@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/
>>  obj-y += multiboot.o smbios.o
>>  obj-y += pc.o pc_piix.o pc_q35.o
>>  obj-y += pc_sysfw.o
>> +obj-y += intel_iommu.o
>>  obj-$(CONFIG_XEN) += ../xenpv/ xen/
>>
>>  obj-y += kvmvapic.o
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> new file mode 100644
>> index 000..3ba0e1e
>> --- /dev/null
>> +++ b/hw/i386/intel_iommu.c
>> @@ -0,0 +1,1139 @@
>> +/*
>> + * QEMU emulation of an Intel IOMMU (VT-d)
>> + *   (DMA Remapping device)
>> + *
>> + * Copyright (c) 2013 Knut Omang, Oracle 
>> + * Copyright (C) 2014 Le Tan, 
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> +
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> +
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  
>> USA
>> + */
>> +
>> +#include "hw/sysbus.h"
>> +#include "exec/address-spaces.h"
>> +#include "hw/i386/intel_iommu.h"
>> +
>> +/* #define DEBUG_INTEL_IOMMU */
>> +#ifdef DEBUG_INTEL_IOMMU
>> +#define D(fmt, ...) \
>> +do { fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
>> + ## __VA_ARGS__); } while (0)
>> +#else
>> +#define D(fmt, ...) \
>> +do { } while (0)
>> +#endif
>> +
>
>
> Way too short for a macro name, might conflict with some
> header you include.
> You are polluting the global namespace.
> Best to prefix everything by INTEL_IOMMU_ and intel_iommu_.

Get it. Maybe I will use VTD as the prefix for this.

>
>> +
>> +static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t 
>> val,
>> +uint64_t wmask, uint64_t w1cmask)
>> +{
>> +*((uint64_t *)&s->csr[addr]) = val;
>> +*((uint64_t *)&s->wmask[addr]) = wmask;
>> +*((uint64_t *)&s->w1cmask[addr]) = w1cmask;
>> +}
>> +
>> +static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr,
>> +  uint64_t mask)
>> +{
>> +*((uint64_t *)&s->womask[addr]) = mask;
>> +}
>> +
>> +static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t 
>> val,
>> +uint32_t wmask, uint32_t w1cmask)
>> +{
>> +*((uint32_t *)&s->csr[addr]) = val;
>> +*((uint32_t *)&s->wmask[addr]) = wmask;
>> +*((uint32_t *)&s->w1cmask[addr]) = w1cmask;
>> +}
>> +
>> +static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr,
>> +  uint32_t mask)
>> +{
>> +*((uint32_t *)&s->womask[addr]) = mask;
>> +}
>> +
>> +/* "External" get/set operations */
>> +static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val)
>> +{
>> +uint64_t oldval = *((uint64_t *)&s->csr[addr]);
>>

Re: [Qemu-devel] [PATCH 1/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation

2014-07-23 Thread Le Tan
Hi Paolo,

2014-07-23 15:58 GMT+08:00 Paolo Bonzini :
> Il 22/07/2014 17:47, Le Tan ha scritto:
>> +static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t 
>> val,
>> +uint64_t wmask, uint64_t w1cmask)
>> +{
>> +*((uint64_t *)&s->csr[addr]) = val;
>
> All these casts are not endian-safe.  Please use ldl_le_p, ldq_le_p,
> stl_le_p, stq_le_p.

Thanks very much. Finally I got the idea here.:) Also thanks for your
renaming suggestions.

>> +*((uint64_t *)&s->wmask[addr]) = wmask;
>> +*((uint64_t *)&s->w1cmask[addr]) = w1cmask;
>> +}
>> +
>> +static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr,
>> +  uint64_t mask)
>> +{
>> +*((uint64_t *)&s->womask[addr]) = mask;
>> +}
>> +
>> +static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t 
>> val,
>> +uint32_t wmask, uint32_t w1cmask)
>> +{
>> +*((uint32_t *)&s->csr[addr]) = val;
>> +*((uint32_t *)&s->wmask[addr]) = wmask;
>> +*((uint32_t *)&s->w1cmask[addr]) = w1cmask;
>> +}
>> +
>> +static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr,
>> +  uint32_t mask)
>> +{
>> +*((uint32_t *)&s->womask[addr]) = mask;
>> +}
>> +
>> +/* "External" get/set operations */
>> +static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val)
>> +{
>> +uint64_t oldval = *((uint64_t *)&s->csr[addr]);
>> +uint64_t wmask = *((uint64_t *)&s->wmask[addr]);
>> +uint64_t w1cmask = *((uint64_t *)&s->w1cmask[addr]);
>> +*((uint64_t *)&s->csr[addr]) =
>> +((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val);
>> +}
>> +
>> +static inline void set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val)
>> +{
>> +uint32_t oldval = *((uint32_t *)&s->csr[addr]);
>> +uint32_t wmask = *((uint32_t *)&s->wmask[addr]);
>> +uint32_t w1cmask = *((uint32_t *)&s->w1cmask[addr]);
>> +*((uint32_t *)&s->csr[addr]) =
>> +((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val);
>> +}
>> +
>> +static inline uint64_t get_quad(IntelIOMMUState *s, hwaddr addr)
>> +{
>> +uint64_t val = *((uint64_t *)&s->csr[addr]);
>> +uint64_t womask = *((uint64_t *)&s->womask[addr]);
>> +return val & ~womask;
>> +}
>> +
>> +
>> +static inline uint32_t get_long(IntelIOMMUState *s, hwaddr addr)
>> +{
>> +uint32_t val = *((uint32_t *)&s->csr[addr]);
>> +uint32_t womask = *((uint32_t *)&s->womask[addr]);
>> +return val & ~womask;
>> +}
>> +
>> +
>> +
>> +/* "Internal" get/set operations */
>> +static inline uint64_t __get_quad(IntelIOMMUState *s, hwaddr addr)
>
> get_quad_raw?
>
>> +{
>> +return *((uint64_t *)&s->csr[addr]);
>> +}
>> +
>> +static inline uint32_t __get_long(IntelIOMMUState *s, hwaddr addr)
>
> get_long_raw?
>
>> +{
>> +return *((uint32_t *)&s->csr[addr]);
>> +}
>> +
>> +
>> +/* val = (val & ~clear) | mask */
>> +static inline uint32_t set_mask_long(IntelIOMMUState *s, hwaddr addr,
>
> set_clear_long?
>
>> + uint32_t clear, uint32_t mask)
>> +{
>> +uint32_t *ptr = (uint32_t *)&s->csr[addr];
>> +uint32_t val = (*ptr & ~clear) | mask;
>> +*ptr = val;
>> +return val;
>> +}
>> +
>> +/* val = (val & ~clear) | mask */
>> +static inline uint64_t set_mask_quad(IntelIOMMUState *s, hwaddr addr,
>
> set_clear_quad?
>> + uint64_t clear, uint64_t mask)
>> +{
>> +uint64_t *ptr = (uint64_t *)&s->csr[addr];
>> +uint64_t val = (*ptr & ~clear) | mask;
>> +*ptr = val;
>> +return val;
>> +}
>> +
>> +
>

Regards,
Le



Re: [Qemu-devel] [PATCH 1/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation

2014-07-23 Thread Le Tan
Hi Stefan,

2014-07-24 4:29 GMT+08:00 Stefan Weil :
> Am 22.07.2014 17:47, schrieb Le Tan:
>> Add support for emulating Intel IOMMU according to the VT-d specification for
>> the q35 chipset machine. Implement the logic for DMAR (DMA remapping) without
>> PASID support. Use register-based invalidation for context-cache invalidation
>> and IOTLB invalidation.
>> Basic fault reporting and caching are not implemented yet.
>>
>> Signed-off-by: Le Tan 
>> ---
>>  hw/i386/Makefile.objs |1 +
>>  hw/i386/intel_iommu.c | 1139 
>> +
>>  include/hw/i386/intel_iommu.h |  350 +
>>  3 files changed, 1490 insertions(+)
>>  create mode 100644 hw/i386/intel_iommu.c
>>  create mode 100644 include/hw/i386/intel_iommu.h
>>
> [...]
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> new file mode 100644
>> index 000..3ba0e1e
>> --- /dev/null
>> +++ b/hw/i386/intel_iommu.c
>> @@ -0,0 +1,1139 @@
>> +/*
>> + * QEMU emulation of an Intel IOMMU (VT-d)
>> + *   (DMA Remapping device)
>> + *
>> + * Copyright (c) 2013 Knut Omang, Oracle 
>> + * Copyright (C) 2014 Le Tan, 
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> +
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> +
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  
>> USA
>> + */
>> +
>
> I suggest replacing the FSF address here (and in other files) by the URL:
>
>  * You should have received a copy of the GNU General Public License along
>  * with this program; if not, see <http://www.gnu.org/licenses/>.
>
> This is the standard used for most GPL text in QEMU source files.

Get it. I copied it from the Linux kernel tree.
Thanks very much!

> Regards
> Stefan W.
>

Regards,
Le



[Qemu-devel] [PATCH v2 0/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset

2014-07-27 Thread Le Tan
Hi,

These patches are intended to introduce Intel IOMMU (VT-d) emulation to q35
chipset. The major job in these patches is to add support for emulating Intel
IOMMU according to the VT-d specification, including basic responses to CSRs
accesses, the logic of DMAR (DMA remapping) and DMA memory address
translations.

Features implemented for now are:
1. Response to important CSRs accesses;
2. DMAR (DMA remapping) without PASID support;
3. Use register-based invalidation for IOTLB and context cache invalidation;
4. Add DMAR table to ACPI tables to expose VT-d to BIOS;
5. Add "-machine iommu=on|off" option to enable/disable VT-d;
6. Only one DMAR unit for all the devices of PCI Segment 0.

Testing:
1. L1 guest with Linux with intel_iommu=on can interact with VT-d and boot
smoothly, and I can see info about VT-d in log of kernel;
2. Run L1 with VT-d, L2 guest with Linux can boot smoothly withou PCI device
passthrough;
3. Run L1 with VT-d and "-soundhw ac97 (QEMU_AUDIO_DRV=none)", then assign the
sound card to L2; L2 can boot smoothly with legacy PCI assignment;
4. Jailhouse hypervisor seems to run smoothly for now (tested by Jan).
5. Run L1 with VT-d and e1000 network card, then assign e1000 to L2; L2 will be
STUCK when booting. This still remains unsolved now. As far as I know, I suppose
that the L2 crashes when doing e1000_probe(). The QEMU of L1 will dump
something with "KVM: entry failed, hardware error 0x0", and the KVM of host
will print "nested_vmx_exit_handled failed vm entry 7". Unlike assigning the
sound card, after being assigned to L2, there is no translation entry of e1000
through VT-d, which I think means that e1000 doesn't issue any DMA access during
the boot of L2. Sometimes the kernel of L2 will print "divide error" during
booting. Can someone help me with this? Any help is appreciated! :)
6. VFIO is tested and is the same as legacy pci assignment.

I have some questions want to consult here:
1. Now the struct IntelIOMMUState is a member of MCHPCIState. VT-d is registered
as TYPE_SYS_BUS_DEVICE but registers its configuration MemoryRegion as subregion
of mch->pci_address_space. Is this correct? Another thought comes to my mind is
using sysbus_mmio_map() to map the MemoryRegion of VT-d. But I am not sure. And
maybe there are more improper usage of the QOM.
2. For declaration of porinter of pointer, like VTDAddressSpace 
**address_spaces,
checkpatch.pl will warn that "ERROR: need consistent spacing around '*' 
(ctx:WxO)".
Is checkpatch.pl wrong?

TODO:
1. Fix the bug of legacy PCI assignment;
2. Clear up codes related to migration.
3. Queued Invalidation;
4. Basic fault reporting;
5. Caching propertities of IOTLB;

Changes since v1:
*address reviewing suggestions given by Michael, Paolo, Stefan and Jan
-split intel_iommu.h to include/hw/i386/intel_iommu.h and
 hw/i386/intel_iommu_internal.h
-change the copyright information
-change D() to VTD_DPRINTF()
-remove dead code
-rename constant definitions with consistent prefix VTD_
-rename some struct definitions according to QEMU standard
-rename some CSRs access functions
-use endian-save functions to access CSRs
-change machine option to "iommu=on|off"

Thanks very much!

Git trees:
https://github.com/tamlok/qemu

Le Tan (3):
  intel-iommu: introduce Intel IOMMU (VT-d) emulation
  intel-iommu: add DMAR table to ACPI tables
  intel-iommu: add Intel IOMMU emulation to q35 and add a machine option
"iommu" as a switch

 hw/core/machine.c  |  27 +-
 hw/i386/Makefile.objs  |   1 +
 hw/i386/acpi-build.c   |  41 ++
 hw/i386/acpi-defs.h|  70 
 hw/i386/intel_iommu.c  | 911 +
 hw/i386/intel_iommu_internal.h | 257 
 hw/pci-host/q35.c  |  72 +++-
 include/hw/boards.h|   1 +
 include/hw/i386/intel_iommu.h  |  75 
 include/hw/pci-host/q35.h  |   2 +
 qemu-options.hx|   5 +-
 vl.c   |   4 +
 12 files changed, 1457 insertions(+), 9 deletions(-)
 create mode 100644 hw/i386/intel_iommu.c
 create mode 100644 hw/i386/intel_iommu_internal.h
 create mode 100644 include/hw/i386/intel_iommu.h

-- 
1.9.1




[Qemu-devel] [PATCH v2 1/3] intel-iommu: introduce Intel IOMMU (VT-d) emulation

2014-07-27 Thread Le Tan
Add support for emulating Intel IOMMU according to the VT-d specification for
the q35 chipset machine. Implement the logic for DMAR (DMA remapping) without
PASID support. Use register-based invalidation for context-cache invalidation
and IOTLB invalidation.
Basic fault reporting and caching are not implemented yet.

Signed-off-by: Le Tan 
---
 hw/i386/Makefile.objs  |   1 +
 hw/i386/intel_iommu.c  | 911 +
 hw/i386/intel_iommu_internal.h | 257 
 include/hw/i386/intel_iommu.h  |  75 
 4 files changed, 1244 insertions(+)
 create mode 100644 hw/i386/intel_iommu.c
 create mode 100644 hw/i386/intel_iommu_internal.h
 create mode 100644 include/hw/i386/intel_iommu.h

diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 48014ab..6936111 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/
 obj-y += multiboot.o smbios.o
 obj-y += pc.o pc_piix.o pc_q35.o
 obj-y += pc_sysfw.o
+obj-y += intel_iommu.o
 obj-$(CONFIG_XEN) += ../xenpv/ xen/
 
 obj-y += kvmvapic.o
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
new file mode 100644
index 000..718993a
--- /dev/null
+++ b/hw/i386/intel_iommu.c
@@ -0,0 +1,911 @@
+/*
+ * QEMU emulation of an Intel IOMMU (VT-d)
+ *   (DMA Remapping device)
+ *
+ * Copyright (C) 2013 Knut Omang, Oracle 
+ * Copyright (C) 2014 Le Tan, 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/sysbus.h"
+#include "exec/address-spaces.h"
+#include "intel_iommu_internal.h"
+
+
+/*#define DEBUG_INTEL_IOMMU*/
+#ifdef DEBUG_INTEL_IOMMU
+#define VTD_DPRINTF(fmt, ...) \
+do { fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
+ ## __VA_ARGS__); } while (0)
+#else
+#define VTD_DPRINTF(fmt, ...) \
+do { } while (0)
+#endif
+
+static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
+   uint64_t wmask, uint64_t w1cmask)
+{
+stq_le_p(&s->csr[addr], val);
+stq_le_p(&s->wmask[addr], wmask);
+stq_le_p(&s->w1cmask[addr], w1cmask);
+}
+
+static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr,
+  uint64_t mask)
+{
+stq_le_p(&s->womask[addr], mask);
+}
+
+static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val,
+   uint32_t wmask, uint32_t w1cmask)
+{
+stl_le_p(&s->csr[addr], val);
+stl_le_p(&s->wmask[addr], wmask);
+stl_le_p(&s->w1cmask[addr], w1cmask);
+}
+
+static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr,
+  uint32_t mask)
+{
+stl_le_p(&s->womask[addr], mask);
+}
+
+/* "External" get/set operations */
+static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val)
+{
+uint64_t oldval = ldq_le_p(&s->csr[addr]);
+uint64_t wmask = ldq_le_p(&s->wmask[addr]);
+uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
+stq_le_p(&s->csr[addr],
+ ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
+}
+
+static inline void set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val)
+{
+uint32_t oldval = ldl_le_p(&s->csr[addr]);
+uint32_t wmask = ldl_le_p(&s->wmask[addr]);
+uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
+stl_le_p(&s->csr[addr],
+ ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
+}
+
+static inline uint64_t get_quad(IntelIOMMUState *s, hwaddr addr)
+{
+uint64_t val = ldq_le_p(&s->csr[addr]);
+uint64_t womask = ldq_le_p(&s->womask[addr]);
+return val & ~womask;
+}
+
+
+static inline uint32_t get_long(IntelIOMMUState *s, hwaddr addr)
+{
+uint32_t val = ldl_le_p(&s->csr[addr]);
+uint32_t womask = ldl_le_p(&s->womask[addr]);
+return val & ~womask;
+}
+
+/* "Internal" get/set operations */
+static inline uint64_t get_quad_raw(IntelIOMMUState *s, hwaddr addr)
+{
+return ldq_le_p(&s->csr[addr]);
+}
+
+static inline uint32_t get_long_raw(IntelIOMMUState *s, hwaddr addr)
+{
+return ldl_le_p(&s->csr[addr]);
+}
+
+static inline uint32_t set_clear_mask_l

[Qemu-devel] [PATCH v2 2/3] intel-iommu: add DMAR table to ACPI tables

2014-07-27 Thread Le Tan
Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE exists,
add DMAR table to ACPI RSDT table. For now the DMAR table indicates that there
is only one hardware unit without INTR_REMAP capability on the platform.

Signed-off-by: Le Tan 
---
 hw/i386/acpi-build.c | 41 ++
 hw/i386/acpi-defs.h  | 70 
 2 files changed, 111 insertions(+)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index ebc5f03..8241621 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -45,6 +45,7 @@
 #include "hw/i386/ich9.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci-host/q35.h"
+#include "hw/i386/intel_iommu.h"
 
 #include "hw/i386/q35-acpi-dsdt.hex"
 #include "hw/i386/acpi-dsdt.hex"
@@ -1316,6 +1317,31 @@ build_mcfg_q35(GArray *table_data, GArray *linker, 
AcpiMcfgInfo *info)
 }
 
 static void
+build_dmar_q35(GArray *table_data, GArray *linker)
+{
+int dmar_start = table_data->len;
+
+AcpiTableDmar *dmar;
+AcpiDmarHardwareUnit *drhd;
+
+dmar = acpi_data_push(table_data, sizeof(*dmar));
+dmar->host_address_width = 0x26;/* 0x26 + 1 = 39 */
+dmar->flags = 0;/* No intr_remap for now */
+
+/* DMAR Remapping Hardware Unit Definition structure */
+drhd = acpi_data_push(table_data, sizeof(*drhd));
+drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
+drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope now */
+drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
+drhd->pci_segment = cpu_to_le16(0);
+drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
+
+build_header(linker, table_data, (void *)(table_data->data + dmar_start),
+ "DMAR", table_data->len - dmar_start, 1);
+}
+
+
+static void
 build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc)
 {
 AcpiTableHeader *dsdt;
@@ -1436,6 +1462,17 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
 return true;
 }
 
+static bool acpi_has_iommu(void)
+{
+bool ambiguous;
+Object *intel_iommu;
+
+intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE,
+   &ambiguous);
+return intel_iommu && !ambiguous;
+}
+
+
 static
 void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
 {
@@ -1497,6 +1534,10 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables 
*tables)
 acpi_add_table(table_offsets, tables->table_data);
 build_mcfg_q35(tables->table_data, tables->linker, &mcfg);
 }
+if (acpi_has_iommu()) {
+acpi_add_table(table_offsets, tables->table_data);
+build_dmar_q35(tables->table_data, tables->linker);
+}
 
 /* Add tables supplied by user (if any) */
 for (u = acpi_table_first(); u; u = acpi_table_next(u)) {
diff --git a/hw/i386/acpi-defs.h b/hw/i386/acpi-defs.h
index e93babb..9674825 100644
--- a/hw/i386/acpi-defs.h
+++ b/hw/i386/acpi-defs.h
@@ -314,4 +314,74 @@ struct AcpiTableMcfg {
 } QEMU_PACKED;
 typedef struct AcpiTableMcfg AcpiTableMcfg;
 
+/* DMAR - DMA Remapping table r2.2 */
+struct AcpiTableDmar {
+ACPI_TABLE_HEADER_DEF
+uint8_t host_address_width; /* Maximum DMA physical addressability */
+uint8_t flags;
+uint8_t reserved[10];
+} QEMU_PACKED;
+typedef struct AcpiTableDmar AcpiTableDmar;
+
+/* Masks for Flags field above */
+#define ACPI_DMAR_INTR_REMAP(1)
+#define ACPI_DMAR_X2APIC_OPT_OUT(2)
+
+/*
+ * DMAR sub-structures (Follow DMA Remapping table)
+ */
+#define ACPI_DMAR_SUB_HEADER_DEF /* Common ACPI DMAR sub-structure header */\
+uint16_t type;  \
+uint16_t length;
+
+/* Values for sub-structure type for DMAR */
+enum {
+ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,   /* DRHD */
+ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */
+ACPI_DMAR_TYPE_ATSR = 2,/* ATSR */
+ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3,   /* RHSR */
+ACPI_DMAR_TYPE_ANDD = 4,/* ANDD */
+ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */
+};
+
+/*
+ * Sub-structures for DMAR, correspond to Type in ACPI_DMAR_SUB_HEADER_DEF
+ */
+
+/* DMAR Device Scope structures */
+struct AcpiDmarDeviceScope {
+uint8_t type;
+uint8_t length;
+uint16_t reserved;
+uint8_t enumeration_id;
+uint8_t start_bus_number;
+uint8_t path[0];
+} QEMU_PACKED;
+typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
+
+/* Values for type in struct AcpiDmarDeviceScope */
+enum {
+ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0,
+ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1,
+ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2,
+ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3,
+ACPI_DMAR_SCOPE_TYPE_HPET = 4,
+ACPI_DMAR_SCOPE_TYPE_ACPI = 5,
+ACPI_DMAR_SCOPE_TYPE_RESERVED = 6 /* Reserved for future use */
+};
+
+/* 0: Hardware Unit Definition */
+struct AcpiDmarHardwareUnit {
+ACPI_DMAR_SUB_HEADER_DEF
+uin

[Qemu-devel] [PATCH v2 3/3] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch

2014-07-27 Thread Le Tan
Add Intel IOMMU emulation to q35 chipset and expose it to the guest.
1. Add a machine option. Users can use "-machine iommu=on|off" in the command
line to enable/disable Intel IOMMU. The default is off.
2. Accroding to the machine option, q35 will initialize the Intel IOMMU and
use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function for
the pci bus.
3. q35_host_dma_iommu() will return different address space according to the
bus_num and devfn of the device.

Signed-off-by: Le Tan 
---
 hw/core/machine.c | 27 --
 hw/pci-host/q35.c | 72 +++
 include/hw/boards.h   |  1 +
 include/hw/pci-host/q35.h |  2 ++
 qemu-options.hx   |  5 +++-
 vl.c  |  4 +++
 6 files changed, 102 insertions(+), 9 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index cbba679..9b166e5 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const char 
*value, Error **errp)
 ms->firmware = g_strdup(value);
 }
 
+static bool machine_get_iommu(Object *obj, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+return ms->iommu;
+}
+
+static void machine_set_iommu(Object *obj, bool value, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+ms->iommu = value;
+}
+
 static void machine_initfn(Object *obj)
 {
 object_property_add_str(obj, "accel",
@@ -270,10 +284,17 @@ static void machine_initfn(Object *obj)
  machine_set_dump_guest_core,
  NULL);
 object_property_add_bool(obj, "mem-merge",
- machine_get_mem_merge, machine_set_mem_merge, 
NULL);
-object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, 
NULL);
+ machine_get_mem_merge,
+ machine_set_mem_merge, NULL);
+object_property_add_bool(obj, "usb",
+ machine_get_usb,
+ machine_set_usb, NULL);
 object_property_add_str(obj, "firmware",
-machine_get_firmware, machine_set_firmware, NULL);
+machine_get_firmware,
+machine_set_firmware, NULL);
+object_property_add_bool(obj, "iommu",
+ machine_get_iommu,
+ machine_set_iommu, NULL);
 }
 
 static void machine_finalize(Object *obj)
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index a0a3068..4abd0ee 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -347,6 +347,61 @@ static void mch_reset(DeviceState *qdev)
 mch_update(mch);
 }
 
+static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+IntelIOMMUState *s = opaque;
+VTDAddressSpace **pvtd_as;
+VTDAddressSpace *vtd_as;
+int bus_num = pci_bus_num(bus);
+
+assert(devfn >= 0);
+
+pvtd_as = s->address_spaces[bus_num];
+if (!pvtd_as) {
+/* No corresponding free() */
+pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) *
+VTD_PCI_SLOT_MAX * VTD_PCI_FUNC_MAX);
+s->address_spaces[bus_num] = pvtd_as;
+}
+
+vtd_as = *(pvtd_as + devfn);
+if (!vtd_as) {
+vtd_as = g_malloc0(sizeof(*vtd_as));
+*(pvtd_as + devfn) = vtd_as;
+
+vtd_as->bus_num = bus_num;
+vtd_as->devfn = devfn;
+vtd_as->iommu_state = s;
+memory_region_init_iommu(&vtd_as->iommu, OBJECT(s), &s->iommu_ops,
+ "intel_iommu", UINT64_MAX);
+address_space_init(&vtd_as->as, &vtd_as->iommu, "intel_iommu");
+}
+
+return &vtd_as->as;
+}
+
+static void mch_init_dmar(MCHPCIState *mch)
+{
+Error *error = NULL;
+PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch)));
+
+mch->iommu = INTEL_IOMMU_DEVICE(object_new(TYPE_INTEL_IOMMU_DEVICE));
+qdev_set_parent_bus(DEVICE(mch->iommu), sysbus_get_default());
+object_property_set_bool(OBJECT(mch->iommu), true, "realized", &error);
+
+if (error) {
+fprintf(stderr, "%s\n", error_get_pretty(error));
+error_free(error);
+return;
+}
+
+memory_region_add_subregion(mch->pci_address_space,
+Q35_HOST_BRIDGE_IOMMU_ADDR,
+&mch->iommu->csrmem);
+pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu);
+}
+
+
 static int mch_init(PCIDevice *d)
 {
 int i;
@@ -363,13 +418,20 @@ static int mch_init(PCIDevice *d)
 memory_region_add_subregion_overlap(mch->system_memory, 0xa,
 &mch->smram_region, 1);
 memory_region_set_enabled(&am

[Qemu-devel] vfio in the guest: no available reset mechanism

2014-07-30 Thread Le Tan
Hi,
I am testing vfio in L1 with my VT-d emulation project. I assigned one
of the two AHCI controllers in L1 to L2 via vfio. After I ran the QEMU
in L1, it complains that:
qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no
available reset mechanism.
qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no
available reset mechanism.

Then L2 paused when the SeaBIOS executed in ahci_controller_setup(). I
look into this and found that:
val = ahci_ctrl_readl(ctrl, HOST_CTL);
ahci_ctrl_writel(ctrl, HOST_CTL, val | HOST_CTL_AHCI_EN);
When the BIOS tried to read the HOST_CTL, it returns 0x8002, which
bit 2 (Interrupt Enable) is 1. The AHCI manual says that this bit
should be cleared by default. So maybe L1 didn't reset the device
before assigning it to L2?
Then the BIOS tried to write back to HOST_CTL and it was stuck here. :(

So can anyone give me some advice? About the state of PCI device or
bus-level reset?

Here is the detail of the environment and the way I did the vfio.
1. lspci in L1 said:
00:03.0 SATA controller [0106]: Intel Corporation 82801IR/IO/IH
(ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02)
00:1f.0 ISA bridge [0601]: Intel Corporation 82801IB (ICH9) LPC
Interface Controller [8086:2918] (rev 02)
00:1f.2 SATA controller [0106]: Intel Corporation 82801IR/IO/IH
(ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02)
00:1f.3 SMBus [0c05]: Intel Corporation 82801I (ICH9 Family) SMBus
Controller [8086:2930] (rev 02)
2. Unbind 00:03.0 and do vfio:
modprobe -r vfio_iommu_type1
modprobe vfio_iommu_type1 allow_unsafe_interrupts=1
modprobe vfio-pci
echo :00:03.0 > /sys/bus/pci/devices/\:00\:03.0/driver/unbind
echo "8086 2922" > /sys/bus/pci/drivers/vfio-pci/new_id
3. run L2 with "-device vfio-pci,host=00:03.0"

Any help is appreciated! Thanks very much!

Regards,
Le



Re: [Qemu-devel] vfio in the guest: no available reset mechanism

2014-07-30 Thread Le Tan
Hi Michael,

2014-07-30 21:16 GMT+08:00 Michael S. Tsirkin :
> On Wed, Jul 30, 2014 at 08:24:04PM +0800, Le Tan wrote:
>> Hi,
>> I am testing vfio in L1 with my VT-d emulation project. I assigned one
>> of the two AHCI controllers in L1 to L2 via vfio. After I ran the QEMU
>> in L1, it complains that:
>> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no
>> available reset mechanism.
>> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no
>> available reset mechanism.
>>
>> Then L2 paused when the SeaBIOS executed in ahci_controller_setup(). I
>> look into this and found that:
>> val = ahci_ctrl_readl(ctrl, HOST_CTL);
>> ahci_ctrl_writel(ctrl, HOST_CTL, val | HOST_CTL_AHCI_EN);
>> When the BIOS tried to read the HOST_CTL, it returns 0x8002, which
>> bit 2 (Interrupt Enable) is 1. The AHCI manual says that this bit
>> should be cleared by default. So maybe L1 didn't reset the device
>> before assigning it to L2?
>> Then the BIOS tried to write back to HOST_CTL and it was stuck here. :(
>>
>> So can anyone give me some advice? About the state of PCI device or
>> bus-level reset?
>>
>> Here is the detail of the environment and the way I did the vfio.
>> 1. lspci in L1 said:
>> 00:03.0 SATA controller [0106]: Intel Corporation 82801IR/IO/IH
>> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02)
>> 00:1f.0 ISA bridge [0601]: Intel Corporation 82801IB (ICH9) LPC
>> Interface Controller [8086:2918] (rev 02)
>> 00:1f.2 SATA controller [0106]: Intel Corporation 82801IR/IO/IH
>> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02)
>> 00:1f.3 SMBus [0c05]: Intel Corporation 82801I (ICH9 Family) SMBus
>> Controller [8086:2930] (rev 02)
>> 2. Unbind 00:03.0 and do vfio:
>> modprobe -r vfio_iommu_type1
>> modprobe vfio_iommu_type1 allow_unsafe_interrupts=1
>> modprobe vfio-pci
>> echo :00:03.0 > /sys/bus/pci/devices/\:00\:03.0/driver/unbind
>> echo "8086 2922" > /sys/bus/pci/drivers/vfio-pci/new_id
>> 3. run L2 with "-device vfio-pci,host=00:03.0"
>>
>> Any help is appreciated! Thanks very much!
>>
>> Regards,
>> Le
>
> Clearly, bus level reset can't work for the root bus :)

Thanks very much!
I test the vfio with a second-bus ahci controller and it didn't
complain about the lack of reset mechanism. :) And the return val of
HOST_CTL is normal now (the same as emulated ahci controller).
However, it still paused when the BIOS tried to write to the HOST_CTL.
Do you have any idea?
And we should just test vfio and legacy pci-assignment with second bus
devices, not considering the root-bus devices?
Thanks again!

Regards,
Le

> --
> MST



Re: [Qemu-devel] vfio in the guest: no available reset mechanism

2014-07-31 Thread Le Tan
Hi Alex,

2014-07-30 22:46 GMT+08:00 Alex Williamson :
> On Wed, 2014-07-30 at 22:16 +0800, Le Tan wrote:
>> Hi Michael,
>>
>> 2014-07-30 21:16 GMT+08:00 Michael S. Tsirkin :
>> > On Wed, Jul 30, 2014 at 08:24:04PM +0800, Le Tan wrote:
>> >> Hi,
>> >> I am testing vfio in L1 with my VT-d emulation project. I assigned one
>> >> of the two AHCI controllers in L1 to L2 via vfio. After I ran the QEMU
>> >> in L1, it complains that:
>> >> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no
>> >> available reset mechanism.
>> >> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no
>> >> available reset mechanism.
>> >>
>> >> Then L2 paused when the SeaBIOS executed in ahci_controller_setup(). I
>> >> look into this and found that:
>> >> val = ahci_ctrl_readl(ctrl, HOST_CTL);
>> >> ahci_ctrl_writel(ctrl, HOST_CTL, val | HOST_CTL_AHCI_EN);
>> >> When the BIOS tried to read the HOST_CTL, it returns 0x8002, which
>> >> bit 2 (Interrupt Enable) is 1. The AHCI manual says that this bit
>> >> should be cleared by default. So maybe L1 didn't reset the device
>> >> before assigning it to L2?
>> >> Then the BIOS tried to write back to HOST_CTL and it was stuck here. :(
>> >>
>> >> So can anyone give me some advice? About the state of PCI device or
>> >> bus-level reset?
>> >>
>> >> Here is the detail of the environment and the way I did the vfio.
>> >> 1. lspci in L1 said:
>> >> 00:03.0 SATA controller [0106]: Intel Corporation 82801IR/IO/IH
>> >> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02)
>> >> 00:1f.0 ISA bridge [0601]: Intel Corporation 82801IB (ICH9) LPC
>> >> Interface Controller [8086:2918] (rev 02)
>> >> 00:1f.2 SATA controller [0106]: Intel Corporation 82801IR/IO/IH
>> >> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02)
>> >> 00:1f.3 SMBus [0c05]: Intel Corporation 82801I (ICH9 Family) SMBus
>> >> Controller [8086:2930] (rev 02)
>> >> 2. Unbind 00:03.0 and do vfio:
>> >> modprobe -r vfio_iommu_type1
>> >> modprobe vfio_iommu_type1 allow_unsafe_interrupts=1
>> >> modprobe vfio-pci
>> >> echo :00:03.0 > /sys/bus/pci/devices/\:00\:03.0/driver/unbind
>> >> echo "8086 2922" > /sys/bus/pci/drivers/vfio-pci/new_id
>> >> 3. run L2 with "-device vfio-pci,host=00:03.0"
>> >>
>> >> Any help is appreciated! Thanks very much!
>> >>
>> >> Regards,
>> >> Le
>> >
>> > Clearly, bus level reset can't work for the root bus :)
>>
>> Thanks very much!
>> I test the vfio with a second-bus ahci controller and it didn't
>> complain about the lack of reset mechanism. :) And the return val of
>> HOST_CTL is normal now (the same as emulated ahci controller).
>> However, it still paused when the BIOS tried to write to the HOST_CTL.
>> Do you have any idea?
>> And we should just test vfio and legacy pci-assignment with second bus
>> devices, not considering the root-bus devices?
>
> AHCI seems like a poor choice of device for this work, they typically
> don't support any kind of reset and they can be troublesome even for the
> L1 assignment.  You really want something with FLR support so that both
> the host and L1 guest can potentially reset the device.  That said, you
> may still run into bugs with a L1 guest directed FLR.  Thanks,
>
> Alex
>

So what device do you think is suitable for the pci-assign test? e1000?
I just tested it with sound card ac97 and USB controller. But I don't
know how to attach them to a pcie-to-pci bridge, so maybe they weren't
reset before being assigned to L2. But it seems that they can work.
1. With the sound card, I assigned it to L2 via both vfio and legacy
pci-assign and I could hear the music played in L2 from host's
speakers. Of course, the vfio also complained about the lack of reset
mechanism.
2. With the USB controller, I used "-usb -usbdevice disk:file" to
attach a USB disk to L1. But there were 4 related devices in L1, so I
didn't know what should be assigned to L2. the info qtree was like
this:
 dev: ich9-usb-uhci3, id ""
masterbus = "usb-bus.0"
firstport = 4 (0x4)
bandwidth = 1280 (0x500)
maxframes = 128 (0x80)
addr = 1d.2
dev: ich9-usb-uhci2, id ""
masterbus = "usb-bus.0"
firstport = 2 (0x2)

Re: [Qemu-devel] vfio in the guest: no available reset mechanism

2014-08-01 Thread Le Tan
2014-08-01 23:25 GMT+08:00 Alex Williamson :
> On Fri, 2014-08-01 at 09:35 +0800, Le Tan wrote:
>> Hi Alex,
>>
>> 2014-07-30 22:46 GMT+08:00 Alex Williamson :
>> > On Wed, 2014-07-30 at 22:16 +0800, Le Tan wrote:
>> >> Hi Michael,
>> >>
>> >> 2014-07-30 21:16 GMT+08:00 Michael S. Tsirkin :
>> >> > On Wed, Jul 30, 2014 at 08:24:04PM +0800, Le Tan wrote:
>> >> >> Hi,
>> >> >> I am testing vfio in L1 with my VT-d emulation project. I assigned one
>> >> >> of the two AHCI controllers in L1 to L2 via vfio. After I ran the QEMU
>> >> >> in L1, it complains that:
>> >> >> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no
>> >> >> available reset mechanism.
>> >> >> qemu-system-x86_64: vfio: Cannot reset device :00:03.0, no
>> >> >> available reset mechanism.
>> >> >>
>> >> >> Then L2 paused when the SeaBIOS executed in ahci_controller_setup(). I
>> >> >> look into this and found that:
>> >> >> val = ahci_ctrl_readl(ctrl, HOST_CTL);
>> >> >> ahci_ctrl_writel(ctrl, HOST_CTL, val | HOST_CTL_AHCI_EN);
>> >> >> When the BIOS tried to read the HOST_CTL, it returns 0x8002, which
>> >> >> bit 2 (Interrupt Enable) is 1. The AHCI manual says that this bit
>> >> >> should be cleared by default. So maybe L1 didn't reset the device
>> >> >> before assigning it to L2?
>> >> >> Then the BIOS tried to write back to HOST_CTL and it was stuck here. :(
>> >> >>
>> >> >> So can anyone give me some advice? About the state of PCI device or
>> >> >> bus-level reset?
>> >> >>
>> >> >> Here is the detail of the environment and the way I did the vfio.
>> >> >> 1. lspci in L1 said:
>> >> >> 00:03.0 SATA controller [0106]: Intel Corporation 82801IR/IO/IH
>> >> >> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02)
>> >> >> 00:1f.0 ISA bridge [0601]: Intel Corporation 82801IB (ICH9) LPC
>> >> >> Interface Controller [8086:2918] (rev 02)
>> >> >> 00:1f.2 SATA controller [0106]: Intel Corporation 82801IR/IO/IH
>> >> >> (ICH9R/DO/DH) 6 port SATA Controller [AHCI mode] [8086:2922] (rev 02)
>> >> >> 00:1f.3 SMBus [0c05]: Intel Corporation 82801I (ICH9 Family) SMBus
>> >> >> Controller [8086:2930] (rev 02)
>> >> >> 2. Unbind 00:03.0 and do vfio:
>> >> >> modprobe -r vfio_iommu_type1
>> >> >> modprobe vfio_iommu_type1 allow_unsafe_interrupts=1
>> >> >> modprobe vfio-pci
>> >> >> echo :00:03.0 > /sys/bus/pci/devices/\:00\:03.0/driver/unbind
>> >> >> echo "8086 2922" > /sys/bus/pci/drivers/vfio-pci/new_id
>> >> >> 3. run L2 with "-device vfio-pci,host=00:03.0"
>> >> >>
>> >> >> Any help is appreciated! Thanks very much!
>> >> >>
>> >> >> Regards,
>> >> >> Le
>> >> >
>> >> > Clearly, bus level reset can't work for the root bus :)
>> >>
>> >> Thanks very much!
>> >> I test the vfio with a second-bus ahci controller and it didn't
>> >> complain about the lack of reset mechanism. :) And the return val of
>> >> HOST_CTL is normal now (the same as emulated ahci controller).
>> >> However, it still paused when the BIOS tried to write to the HOST_CTL.
>> >> Do you have any idea?
>> >> And we should just test vfio and legacy pci-assignment with second bus
>> >> devices, not considering the root-bus devices?
>> >
>> > AHCI seems like a poor choice of device for this work, they typically
>> > don't support any kind of reset and they can be troublesome even for the
>> > L1 assignment.  You really want something with FLR support so that both
>> > the host and L1 guest can potentially reset the device.  That said, you
>> > may still run into bugs with a L1 guest directed FLR.  Thanks,
>> >
>> > Alex
>> >
>>
>> So what device do you think is suitable for the pci-assign test? e1000?
>> I just tested it with sound card ac97 and USB controller. But I don't
>> know how to attach them to a pcie-to-pci bridge, so maybe they weren't
>> reset before being assigned to L2.

[Qemu-devel] About the VT-d features that q35 chipset supports

2014-08-05 Thread Le Tan
Hi,
I am now adding features to the VT-d emulation for q35 chipset. I
think it is good to know what features q35 chipset supports exactly.
However I can't find materials about this. Does anyone know this? Or
it will be fine if someone can tell me the value of the Capability
Register and Extended Capability Register of the VT-d on q35.
Any help is appreciated!
Thanks very much!

Regards,
Le



Re: [Qemu-devel] About the VT-d features that q35 chipset supports

2014-08-05 Thread Le Tan
Hi Jan,

2014-08-06 1:08 GMT+08:00 Jan Kiszka :
> On 2014-08-05 14:55, Le Tan wrote:
>> Hi,
>> I am now adding features to the VT-d emulation for q35 chipset. I
>> think it is good to know what features q35 chipset supports exactly.
>> However I can't find materials about this. Does anyone know this? Or
>> it will be fine if someone can tell me the value of the Capability
>> Register and Extended Capability Register of the VT-d on q35.
>
> The kernel reports those registers during boot-up, so we would just need
> "dmesg | grep dmar" from a real 82Q35 box (Intel 3 Series).

Thanks! But my computer seems not to be Intel 3 Series. It seems to be
"7 Series/C210 Series Chipset Family". What's more, Acer seems to
disable and hide the VT-d support of it.

Le

> Jan
>
>> Any help is appreciated!
>> Thanks very much!
>>
>> Regards,
>> Le
>>
>
>



[Qemu-devel] [PATCH v3 0/5] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset

2014-08-11 Thread Le Tan
Hi,

These patches are intended to introduce Intel IOMMU (VT-d) emulation to q35
chipset. The major job in these patches is to add support for emulating Intel
IOMMU according to the VT-d specification, including basic responses to CSRs
accesses, the logics of DMAR (DMA remapping) and DMA memory address
translations.

Features implemented for now are:
1. Response to important CSRs accesses;
2. DMAR (DMA remapping) without PASID support;
3. Primary fault logging;
4. Support both register-based and queued invalidation for IOTLB and context
   cache invalidation;
5. Add DMAR table to ACPI tables to expose VT-d to BIOS;
6. Add "-machine iommu=on|off" option to enable/disable VT-d;
7. Only one DMAR unit for all the devices of PCI Segment 0.

Testing:
1. L1 guest with Linux with intel_iommu=on can interact with VT-d and boot
smoothly, and there exists information about VT-d in the log of kernel;
2. Run L1 with VT-d, L2 guest with Linux can boot smoothly withou PCI device
passthrough;
3. Run L1 with VT-d and "-soundhw ac97 (QEMU_AUDIO_DRV=alsa)", then assign the
sound card to L2; L2 can boot smoothly with legacy PCI assignment and I can
hear the music played in L2 from the host speakers;
4. Jailhouse hypervisor can run smoothly(tested by Jan).
5. Run L1 with VT-d and e1000 network card, then assign e1000 to L2; L2 will be
STUCK when booting. This still remains unsolved now. As far as I know, I suppose
that the L2 crashes when doing e1000_probe(). The QEMU of L1 will dump
something with "KVM: entry failed, hardware error 0x0", and the KVM of host
will print "nested_vmx_exit_handled failed vm entry 7". Unlike assigning the
sound card, after being assigned to L2, there is no translation entry of e1000
through VT-d, which I think means that e1000 doesn't issue any DMA access during
the boot of L2. Sometimes the kernel of L2 will print "divide error" during
booting. Maybe it results from the lack of reset mechanism.
6. VFIO is tested and is similar to legacy pci assignment.

Discussion:
1. There is one functionality called Zero-Length-Read (ZLR) which supports zero
length DMA read requests to write-only pages. If the VT-d emulation supports
ZLR, we need to know the exact length of one access. For now can QEMU express
zero-length requests?

TODO:
1. Context cache and IOTLB cache;
2. Fix the bug of legacy PCI assignment;

Changes since v2:
*address reviewing suggestions given by Jan
-add support for primary fault logging
-add support for queued invalidation

Changes since v1:
*address reviewing suggestions given by Michael, Paolo, Stefan and Jan
-split intel_iommu.h to include/hw/i386/intel_iommu.h and
 hw/i386/intel_iommu_internal.h
-change the copyright information
-change D() to VTD_DPRINTF()
-remove dead code
-rename constant definitions with consistent prefix VTD_
-rename some struct definitions according to QEMU standard
-rename some CSRs access functions
-use endian-save functions to access CSRs
-change machine option to "iommu=on|off"

Thanks very much!

Git trees:
https://github.com/tamlok/qemu

Le Tan (5):
  iommu: add is_write as a parameter to the translate function of
MemoryRegionIOMMUOps
  intel-iommu: introduce Intel IOMMU (VT-d) emulation
  intel-iommu: add DMAR table to ACPI tables
  intel-iommu: add Intel IOMMU emulation to q35 and add a machine option
"iommu" as a switch
  intel-iommu: add supports for queued invalidation interface

 exec.c |2 +-
 hw/alpha/typhoon.c |3 +-
 hw/core/machine.c  |   27 +-
 hw/i386/Makefile.objs  |1 +
 hw/i386/acpi-build.c   |   41 +
 hw/i386/acpi-defs.h|   70 ++
 hw/i386/intel_iommu.c  | 1722 
 hw/i386/intel_iommu_internal.h |  358 +
 hw/pci-host/apb.c  |3 +-
 hw/pci-host/q35.c  |   64 +-
 hw/ppc/spapr_iommu.c   |3 +-
 include/exec/memory.h  |2 +-
 include/hw/boards.h|1 +
 include/hw/i386/intel_iommu.h  |   90 +++
 include/hw/pci-host/q35.h  |2 +
 qemu-options.hx|5 +-
 vl.c   |4 +
 17 files changed, 2384 insertions(+), 14 deletions(-)
 create mode 100644 hw/i386/intel_iommu.c
 create mode 100644 hw/i386/intel_iommu_internal.h
 create mode 100644 include/hw/i386/intel_iommu.h

-- 
1.9.1




[Qemu-devel] [PATCH v3 5/5] intel-iommu: add supports for queued invalidation interface

2014-08-11 Thread Le Tan
Add supports for queued invalidation interface, an expended invalidation
interface with extended capabilities.

Signed-off-by: Le Tan 
---
 hw/i386/intel_iommu.c  | 381 -
 hw/i386/intel_iommu_internal.h |  15 +-
 2 files changed, 393 insertions(+), 3 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index b3a4f78..0e7d62d 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -332,6 +332,43 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, 
uint16_t source_id,
 }
 }
 
+/* Handle Invalidation Queue Errors of queued invalidation interface error
+ * conditions.
+ */
+static void vtd_handle_inv_queue_error(IntelIOMMUState *s)
+{
+uint32_t fsts_reg = get_long_raw(s, DMAR_FSTS_REG);
+
+set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE);
+vtd_generate_fault_event(s, fsts_reg);
+}
+
+/* Set the IWC field and try to generate an invalidation completion interrupt 
*/
+static void vtd_generate_completion_event(IntelIOMMUState *s)
+{
+VTD_DPRINTF(INV, "completes an invalidation wait command with "
+"Interrupt Flag");
+if (get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) {
+VTD_DPRINTF(INV, "there is a previous interrupt condition to be "
+"serviced by software, "
+"new invalidation event is not generated");
+return;
+}
+
+set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC);
+set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP);
+if (get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) {
+VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation "
+"event is not generated");
+return;
+} else {
+/* Generate the interrupt event */
+vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
+set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
+}
+
+}
+
 static inline bool root_entry_present(VTDRootEntry *root)
 {
 return root->val & VTD_ROOT_ENTRY_P;
@@ -809,6 +846,54 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, 
uint64_t val)
 return iaig;
 }
 
+static inline bool queued_inv_enable_check(IntelIOMMUState *s)
+{
+return s->iq_tail == 0;
+}
+
+static inline bool queued_inv_disable_check(IntelIOMMUState *s)
+{
+return s->qi_enabled && (s->iq_tail == s->iq_head) &&
+   (s->iq_last_desc_type == VTD_INV_DESC_WAIT);
+}
+
+static void handle_gcmd_qie(IntelIOMMUState *s, bool en)
+{
+uint64_t iqa_val = get_quad_raw(s, DMAR_IQA_REG);
+VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off"));
+
+if (en) {
+if (queued_inv_enable_check(s)) {
+s->iq = iqa_val & VTD_IQA_IQA_MASK;
+/* 2^(x+8) entries */
+s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
+s->qi_enabled = true;
+VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val);
+VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d",
+s->iq, s->iq_size);
+/* Ok - report back to driver */
+set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
+} else {
+VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: "
+"tail %"PRIu16, s->iq_tail);
+}
+} else {
+if (queued_inv_disable_check(s)) {
+/* disable Queued Invalidation */
+set_quad_raw(s, DMAR_IQH_REG, 0);
+s->iq_head = 0;
+s->qi_enabled = false;
+/* Ok - report back to driver */
+set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0);
+} else {
+VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: "
+"head %"PRIu16 ", tail %"PRIu16
+", last_descriptor %"PRIu8,
+s->iq_head, s->iq_tail, s->iq_last_desc_type);
+}
+}
+}
+
 /* Set Root Table Pointer */
 static void handle_gcmd_srtp(IntelIOMMUState *s)
 {
@@ -854,6 +939,10 @@ static void handle_gcmd_write(IntelIOMMUState *s)
 /* Set/update the root-table pointer */
 handle_gcmd_srtp(s);
 }
+if (changed & VTD_GCMD_QIE) {
+/* Queued Invalidation Enable */
+handle_gcmd_qie(s, val & VTD_GCMD_QIE);
+}
 }
 
 /* Handle write to Context Command Register */
@@ -864,6 +953,11 @@ static void handle_ccmd_write(IntelIOMMUState *s)
 
 /* Context-cache invalidation request */
 if (val & VTD_CCMD_ICC) {
+if (s->qi_enabled) {
+VTD_DPRINTF(GENERAL, "error: Queued Invalidation enabled, "
+

[Qemu-devel] [PATCH v3 2/5] intel-iommu: introduce Intel IOMMU (VT-d) emulation

2014-08-11 Thread Le Tan
Add support for emulating Intel IOMMU according to the VT-d specification for
the q35 chipset machine. Implement the logics for DMAR (DMA remapping) without
PASID support. The emulation supports register-based invalidation and primary
fault logging.

Signed-off-by: Le Tan 
---
 hw/i386/Makefile.objs  |1 +
 hw/i386/intel_iommu.c  | 1345 
 hw/i386/intel_iommu_internal.h |  345 +++
 include/hw/i386/intel_iommu.h  |   90 +++
 4 files changed, 1781 insertions(+)
 create mode 100644 hw/i386/intel_iommu.c
 create mode 100644 hw/i386/intel_iommu_internal.h
 create mode 100644 include/hw/i386/intel_iommu.h

diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 48014ab..6936111 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/
 obj-y += multiboot.o smbios.o
 obj-y += pc.o pc_piix.o pc_q35.o
 obj-y += pc_sysfw.o
+obj-y += intel_iommu.o
 obj-$(CONFIG_XEN) += ../xenpv/ xen/
 
 obj-y += kvmvapic.o
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
new file mode 100644
index 000..b3a4f78
--- /dev/null
+++ b/hw/i386/intel_iommu.c
@@ -0,0 +1,1345 @@
+/*
+ * QEMU emulation of an Intel IOMMU (VT-d)
+ *   (DMA Remapping device)
+ *
+ * Copyright (C) 2013 Knut Omang, Oracle 
+ * Copyright (C) 2014 Le Tan, 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/sysbus.h"
+#include "exec/address-spaces.h"
+#include "intel_iommu_internal.h"
+
+
+/*#define DEBUG_INTEL_IOMMU*/
+#ifdef DEBUG_INTEL_IOMMU
+enum {
+DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
+};
+#define VTD_DBGBIT(x)   (1 << DEBUG_##x)
+static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR) |
+  VTD_DBGBIT(FLOG);
+
+#define VTD_DPRINTF(what, fmt, ...) do { \
+if (vtd_dbgflags & VTD_DBGBIT(what)) { \
+fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
+## __VA_ARGS__); } \
+} while (0)
+#else
+#define VTD_DPRINTF(what, fmt, ...) do {} while (0)
+#endif
+
+static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
+   uint64_t wmask, uint64_t w1cmask)
+{
+stq_le_p(&s->csr[addr], val);
+stq_le_p(&s->wmask[addr], wmask);
+stq_le_p(&s->w1cmask[addr], w1cmask);
+}
+
+static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr,
+  uint64_t mask)
+{
+stq_le_p(&s->womask[addr], mask);
+}
+
+static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val,
+   uint32_t wmask, uint32_t w1cmask)
+{
+stl_le_p(&s->csr[addr], val);
+stl_le_p(&s->wmask[addr], wmask);
+stl_le_p(&s->w1cmask[addr], w1cmask);
+}
+
+static inline void define_long_wo(IntelIOMMUState *s, hwaddr addr,
+  uint32_t mask)
+{
+stl_le_p(&s->womask[addr], mask);
+}
+
+/* "External" get/set operations */
+static inline void set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val)
+{
+uint64_t oldval = ldq_le_p(&s->csr[addr]);
+uint64_t wmask = ldq_le_p(&s->wmask[addr]);
+uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
+stq_le_p(&s->csr[addr],
+ ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
+}
+
+static inline void set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val)
+{
+uint32_t oldval = ldl_le_p(&s->csr[addr]);
+uint32_t wmask = ldl_le_p(&s->wmask[addr]);
+uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
+stl_le_p(&s->csr[addr],
+ ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
+}
+
+static inline uint64_t get_quad(IntelIOMMUState *s, hwaddr addr)
+{
+uint64_t val = ldq_le_p(&s->csr[addr]);
+uint64_t womask = ldq_le_p(&s->womask[addr]);
+return val & ~womask;
+}
+
+
+static inline uint32_t get_long(IntelIOMMUState *s, hwaddr addr)
+{
+uint32_t val = ldl_le_p(&s->csr[addr]);
+uint32_t womask = ldl_le_p(&s->womask[addr]);
+return val & ~womask;
+}
+
+/* "Internal" get/set operations */
+static inline uint64_t get_quad_raw(Intel

[Qemu-devel] [PATCH v3 3/5] intel-iommu: add DMAR table to ACPI tables

2014-08-11 Thread Le Tan
Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE exists,
add DMAR table to ACPI RSDT table. For now the DMAR table indicates that there
is only one hardware unit without INTR_REMAP capability on the platform.

Signed-off-by: Le Tan 
---
 hw/i386/acpi-build.c | 41 ++
 hw/i386/acpi-defs.h  | 70 
 2 files changed, 111 insertions(+)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 816c6d9..595f501 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -47,6 +47,7 @@
 #include "hw/i386/ich9.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci-host/q35.h"
+#include "hw/i386/intel_iommu.h"
 
 #include "hw/i386/q35-acpi-dsdt.hex"
 #include "hw/i386/acpi-dsdt.hex"
@@ -1350,6 +1351,31 @@ build_mcfg_q35(GArray *table_data, GArray *linker, 
AcpiMcfgInfo *info)
 }
 
 static void
+build_dmar_q35(GArray *table_data, GArray *linker)
+{
+int dmar_start = table_data->len;
+
+AcpiTableDmar *dmar;
+AcpiDmarHardwareUnit *drhd;
+
+dmar = acpi_data_push(table_data, sizeof(*dmar));
+dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
+dmar->flags = 0;/* No intr_remap for now */
+
+/* DMAR Remapping Hardware Unit Definition structure */
+drhd = acpi_data_push(table_data, sizeof(*drhd));
+drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
+drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope now */
+drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
+drhd->pci_segment = cpu_to_le16(0);
+drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
+
+build_header(linker, table_data, (void *)(table_data->data + dmar_start),
+ "DMAR", table_data->len - dmar_start, 1);
+}
+
+
+static void
 build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc)
 {
 AcpiTableHeader *dsdt;
@@ -1470,6 +1496,17 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
 return true;
 }
 
+static bool acpi_has_iommu(void)
+{
+bool ambiguous;
+Object *intel_iommu;
+
+intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE,
+   &ambiguous);
+return intel_iommu && !ambiguous;
+}
+
+
 static
 void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
 {
@@ -1539,6 +1576,10 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables 
*tables)
 acpi_add_table(table_offsets, tables->table_data);
 build_mcfg_q35(tables->table_data, tables->linker, &mcfg);
 }
+if (acpi_has_iommu()) {
+acpi_add_table(table_offsets, tables->table_data);
+build_dmar_q35(tables->table_data, tables->linker);
+}
 
 /* Add tables supplied by user (if any) */
 for (u = acpi_table_first(); u; u = acpi_table_next(u)) {
diff --git a/hw/i386/acpi-defs.h b/hw/i386/acpi-defs.h
index e93babb..9674825 100644
--- a/hw/i386/acpi-defs.h
+++ b/hw/i386/acpi-defs.h
@@ -314,4 +314,74 @@ struct AcpiTableMcfg {
 } QEMU_PACKED;
 typedef struct AcpiTableMcfg AcpiTableMcfg;
 
+/* DMAR - DMA Remapping table r2.2 */
+struct AcpiTableDmar {
+ACPI_TABLE_HEADER_DEF
+uint8_t host_address_width; /* Maximum DMA physical addressability */
+uint8_t flags;
+uint8_t reserved[10];
+} QEMU_PACKED;
+typedef struct AcpiTableDmar AcpiTableDmar;
+
+/* Masks for Flags field above */
+#define ACPI_DMAR_INTR_REMAP(1)
+#define ACPI_DMAR_X2APIC_OPT_OUT(2)
+
+/*
+ * DMAR sub-structures (Follow DMA Remapping table)
+ */
+#define ACPI_DMAR_SUB_HEADER_DEF /* Common ACPI DMAR sub-structure header */\
+uint16_t type;  \
+uint16_t length;
+
+/* Values for sub-structure type for DMAR */
+enum {
+ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,   /* DRHD */
+ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */
+ACPI_DMAR_TYPE_ATSR = 2,/* ATSR */
+ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3,   /* RHSR */
+ACPI_DMAR_TYPE_ANDD = 4,/* ANDD */
+ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */
+};
+
+/*
+ * Sub-structures for DMAR, correspond to Type in ACPI_DMAR_SUB_HEADER_DEF
+ */
+
+/* DMAR Device Scope structures */
+struct AcpiDmarDeviceScope {
+uint8_t type;
+uint8_t length;
+uint16_t reserved;
+uint8_t enumeration_id;
+uint8_t start_bus_number;
+uint8_t path[0];
+} QEMU_PACKED;
+typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
+
+/* Values for type in struct AcpiDmarDeviceScope */
+enum {
+ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0,
+ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1,
+ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2,
+ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3,
+ACPI_DMAR_SCOPE_TYPE_HPET = 4,
+ACPI_DMAR_SCOPE_TYPE_ACPI = 5,
+ACPI_DMAR_SCOPE_TYPE_RESERVED = 6 /* Reserved for future use */
+};
+
+/* 0: Hardware Unit Definition */
+struct AcpiDmarHardwareUnit {
+ACPI_DMAR_SUB_HEADER_DEF
+uin

[Qemu-devel] [PATCH v3 1/5] iommu: add is_write as a parameter to the translate function of MemoryRegionIOMMUOps

2014-08-11 Thread Le Tan
Add a bool variable is_write as a parameter to the translate function of
MemoryRegionIOMMUOps to indicate the operation of the access. It can be
used for correct fault reporting from within the callback.
Change the interface of related functions.

Signed-off-by: Le Tan 
---
 exec.c| 2 +-
 hw/alpha/typhoon.c| 3 ++-
 hw/pci-host/apb.c | 3 ++-
 hw/ppc/spapr_iommu.c  | 3 ++-
 include/exec/memory.h | 2 +-
 5 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/exec.c b/exec.c
index 765bd94..5ccc106 100644
--- a/exec.c
+++ b/exec.c
@@ -373,7 +373,7 @@ MemoryRegion *address_space_translate(AddressSpace *as, 
hwaddr addr,
 break;
 }
 
-iotlb = mr->iommu_ops->translate(mr, addr);
+iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 | (addr & iotlb.addr_mask));
 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 67a1070..31947d9 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -660,7 +660,8 @@ static bool window_translate(TyphoonWindow *win, hwaddr 
addr,
 /* Handle PCI-to-system address translation.  */
 /* TODO: A translation failure here ought to set PCI error codes on the
Pchip and generate a machine check interrupt.  */
-static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr)
+static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr,
+ bool is_write)
 {
 TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu);
 IOMMUTLBEntry ret;
diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index d238a84..0e0e0ee 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c
@@ -203,7 +203,8 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void 
*opaque, int devfn)
 return &is->iommu_as;
 }
 
-static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr)
+static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr,
+ bool is_write)
 {
 IOMMUState *is = container_of(iommu, IOMMUState, iommu);
 hwaddr baseaddr, offset;
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index f6e32a4..6c91d8e 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -59,7 +59,8 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
 return NULL;
 }
 
-static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr 
addr)
+static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr 
addr,
+   bool is_write)
 {
 sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
 uint64_t tce;
diff --git a/include/exec/memory.h b/include/exec/memory.h
index e2c8e3e..834543d 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -129,7 +129,7 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
 
 struct MemoryRegionIOMMUOps {
 /* Return a TLB entry that contains a given address. */
-IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr);
+IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool 
is_write);
 };
 
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
-- 
1.9.1




[Qemu-devel] [PATCH v3 4/5] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch

2014-08-11 Thread Le Tan
Add Intel IOMMU emulation to q35 chipset and expose it to the guest.
1. Add a machine option. Users can use "-machine iommu=on|off" in the command
line to enable/disable Intel IOMMU. The default is off.
2. Accroding to the machine option, q35 will initialize the Intel IOMMU and
use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function for
the pci bus.
3. q35_host_dma_iommu() will return different address space according to the
bus_num and devfn of the device.

Signed-off-by: Le Tan 
---
 hw/core/machine.c | 27 +---
 hw/pci-host/q35.c | 64 +++
 include/hw/boards.h   |  1 +
 include/hw/pci-host/q35.h |  2 ++
 qemu-options.hx   |  5 +++-
 vl.c  |  4 +++
 6 files changed, 94 insertions(+), 9 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 7a66c57..f0046d6 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const char 
*value, Error **errp)
 ms->firmware = g_strdup(value);
 }
 
+static bool machine_get_iommu(Object *obj, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+return ms->iommu;
+}
+
+static void machine_set_iommu(Object *obj, bool value, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+ms->iommu = value;
+}
+
 static void machine_initfn(Object *obj)
 {
 object_property_add_str(obj, "accel",
@@ -270,10 +284,17 @@ static void machine_initfn(Object *obj)
  machine_set_dump_guest_core,
  NULL);
 object_property_add_bool(obj, "mem-merge",
- machine_get_mem_merge, machine_set_mem_merge, 
NULL);
-object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, 
NULL);
+ machine_get_mem_merge,
+ machine_set_mem_merge, NULL);
+object_property_add_bool(obj, "usb",
+ machine_get_usb,
+ machine_set_usb, NULL);
 object_property_add_str(obj, "firmware",
-machine_get_firmware, machine_set_firmware, NULL);
+machine_get_firmware,
+machine_set_firmware, NULL);
+object_property_add_bool(obj, "iommu",
+ machine_get_iommu,
+ machine_set_iommu, NULL);
 }
 
 static void machine_finalize(Object *obj)
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index a0a3068..3342711 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -347,6 +347,53 @@ static void mch_reset(DeviceState *qdev)
 mch_update(mch);
 }
 
+static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+IntelIOMMUState *s = opaque;
+VTDAddressSpace **pvtd_as;
+VTDAddressSpace *vtd_as;
+int bus_num = pci_bus_num(bus);
+
+assert(devfn >= 0);
+
+pvtd_as = s->address_spaces[bus_num];
+if (!pvtd_as) {
+/* No corresponding free() */
+pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) *
+VTD_PCI_SLOT_MAX * VTD_PCI_FUNC_MAX);
+s->address_spaces[bus_num] = pvtd_as;
+}
+
+vtd_as = *(pvtd_as + devfn);
+if (!vtd_as) {
+vtd_as = g_malloc0(sizeof(*vtd_as));
+*(pvtd_as + devfn) = vtd_as;
+
+vtd_as->bus_num = bus_num;
+vtd_as->devfn = devfn;
+vtd_as->iommu_state = s;
+memory_region_init_iommu(&vtd_as->iommu, OBJECT(s), &s->iommu_ops,
+ "intel_iommu", UINT64_MAX);
+address_space_init(&vtd_as->as, &vtd_as->iommu, "intel_iommu");
+}
+
+return &vtd_as->as;
+}
+
+static void mch_init_dmar(MCHPCIState *mch)
+{
+PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch)));
+
+mch->iommu = INTEL_IOMMU_DEVICE(qdev_create(NULL, 
TYPE_INTEL_IOMMU_DEVICE));
+object_property_add_child(OBJECT(mch), "intel-iommu",
+  OBJECT(mch->iommu), NULL);
+qdev_init_nofail(DEVICE(mch->iommu));
+sysbus_mmio_map(SYS_BUS_DEVICE(mch->iommu), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
+
+pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu);
+}
+
+
 static int mch_init(PCIDevice *d)
 {
 int i;
@@ -363,13 +410,20 @@ static int mch_init(PCIDevice *d)
 memory_region_add_subregion_overlap(mch->system_memory, 0xa,
 &mch->smram_region, 1);
 memory_region_set_enabled(&mch->smram_region, false);
-init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, 
mch->pci_address_space,
- &mch->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE);
+init_pam(DEVICE(mch), mch->ram_mem

Re: [Qemu-devel] [PATCH v3 2/5] intel-iommu: introduce Intel IOMMU (VT-d) emulation

2014-08-12 Thread Le Tan
Hi Jan,

2014-08-12 15:34 GMT+08:00 Jan Kiszka :
> On 2014-08-11 09:04, Le Tan wrote:
>> Add support for emulating Intel IOMMU according to the VT-d specification for
>> the q35 chipset machine. Implement the logics for DMAR (DMA remapping) 
>> without
>> PASID support. The emulation supports register-based invalidation and primary
>> fault logging.
>
> Some arbitrary comments below (means, I didn't read every line and
> likely missed some things). In general, this looks and works pretty good!
>
>>
>> Signed-off-by: Le Tan 
>> ---
>>  hw/i386/Makefile.objs  |1 +
>>  hw/i386/intel_iommu.c  | 1345 
>> 
>>  hw/i386/intel_iommu_internal.h |  345 +++
>>  include/hw/i386/intel_iommu.h  |   90 +++
>>  4 files changed, 1781 insertions(+)
>>  create mode 100644 hw/i386/intel_iommu.c
>>  create mode 100644 hw/i386/intel_iommu_internal.h
>>  create mode 100644 include/hw/i386/intel_iommu.h
>>
>> diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
>> index 48014ab..6936111 100644
>> --- a/hw/i386/Makefile.objs
>> +++ b/hw/i386/Makefile.objs
>> @@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/
>>  obj-y += multiboot.o smbios.o
>>  obj-y += pc.o pc_piix.o pc_q35.o
>>  obj-y += pc_sysfw.o
>> +obj-y += intel_iommu.o
>>  obj-$(CONFIG_XEN) += ../xenpv/ xen/
>>
>>  obj-y += kvmvapic.o
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> new file mode 100644
>> index 000..b3a4f78
>> --- /dev/null
>> +++ b/hw/i386/intel_iommu.c
>> @@ -0,0 +1,1345 @@
>> +/*
>> + * QEMU emulation of an Intel IOMMU (VT-d)
>> + *   (DMA Remapping device)
>> + *
>> + * Copyright (C) 2013 Knut Omang, Oracle 
>> + * Copyright (C) 2014 Le Tan, 
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> +
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> +
>> + * You should have received a copy of the GNU General Public License along
>> + * with this program; if not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include "hw/sysbus.h"
>> +#include "exec/address-spaces.h"
>> +#include "intel_iommu_internal.h"
>> +
>> +
>> +/*#define DEBUG_INTEL_IOMMU*/
>> +#ifdef DEBUG_INTEL_IOMMU
>> +enum {
>> +DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
>> +};
>> +#define VTD_DBGBIT(x)   (1 << DEBUG_##x)
>> +static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR) |
>> +  VTD_DBGBIT(FLOG);
>> +
>> +#define VTD_DPRINTF(what, fmt, ...) do { \
>> +if (vtd_dbgflags & VTD_DBGBIT(what)) { \
>> +fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
>> +## __VA_ARGS__); } \
>> +} while (0)
>> +#else
>> +#define VTD_DPRINTF(what, fmt, ...) do {} while (0)
>> +#endif
>> +
>> +static inline void define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t 
>> val,
>> +   uint64_t wmask, uint64_t w1cmask)
>
> In general, don't declare functions inline needlessly. It makes sense
> for trivial ones you export via a header, but even a 2- or 3-liner like
> this can be more efficient as stand-alone function. Anything bigger
> definitely does not deserve that tag.
>
> Inline is just a hint to the compiler anyway, so you can perfectly leave
> it out for any almost-trivial static function.

OK, I will delete most of them except those just have one "return" line.

>> +{
>> +stq_le_p(&s->csr[addr], val);
>> +stq_le_p(&s->wmask[addr], wmask);
>> +stq_le_p(&s->w1cmask[addr], w1cmask);
>> +}
>> +
>> +static inline void define_quad_wo(IntelIOMMUState *s, hwaddr addr,
>> +  uint64_t mask)
>> +{
>> +stq_le_p(&s->womask[addr], mask);
>> +}
>> +
>> +static inline void define_long(IntelIOMMUState *s, hwaddr addr, uint32_t 
>> val,
>> +   uint32_t wmask, uint32_t w1cmask)
>>

Re: [Qemu-devel] [PATCH v3 4/5] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch

2014-08-14 Thread Le Tan
2014-08-14 19:12 GMT+08:00 Michael S. Tsirkin :
> On Mon, Aug 11, 2014 at 03:05:01PM +0800, Le Tan wrote:
>> Add Intel IOMMU emulation to q35 chipset and expose it to the guest.
>> 1. Add a machine option. Users can use "-machine iommu=on|off" in the command
>> line to enable/disable Intel IOMMU. The default is off.
>> 2. Accroding to the machine option, q35 will initialize the Intel IOMMU and
>> use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function for
>> the pci bus.
>> 3. q35_host_dma_iommu() will return different address space according to the
>> bus_num and devfn of the device.
>>
>> Signed-off-by: Le Tan 
>> ---
>>  hw/core/machine.c | 27 +---
>>  hw/pci-host/q35.c | 64 
>> +++
>>  include/hw/boards.h   |  1 +
>>  include/hw/pci-host/q35.h |  2 ++
>>  qemu-options.hx   |  5 +++-
>>  vl.c  |  4 +++
>>  6 files changed, 94 insertions(+), 9 deletions(-)
>>
>> diff --git a/hw/core/machine.c b/hw/core/machine.c
>> index 7a66c57..f0046d6 100644
>> --- a/hw/core/machine.c
>> +++ b/hw/core/machine.c
>> @@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const 
>> char *value, Error **errp)
>>  ms->firmware = g_strdup(value);
>>  }
>>
>> +static bool machine_get_iommu(Object *obj, Error **errp)
>> +{
>> +MachineState *ms = MACHINE(obj);
>> +
>> +return ms->iommu;
>> +}
>> +
>> +static void machine_set_iommu(Object *obj, bool value, Error **errp)
>> +{
>> +MachineState *ms = MACHINE(obj);
>> +
>> +ms->iommu = value;
>> +}
>> +
>>  static void machine_initfn(Object *obj)
>>  {
>>  object_property_add_str(obj, "accel",
>> @@ -270,10 +284,17 @@ static void machine_initfn(Object *obj)
>>   machine_set_dump_guest_core,
>>   NULL);
>>  object_property_add_bool(obj, "mem-merge",
>> - machine_get_mem_merge, machine_set_mem_merge, 
>> NULL);
>> -object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, 
>> NULL);
>> + machine_get_mem_merge,
>> + machine_set_mem_merge, NULL);
>> +object_property_add_bool(obj, "usb",
>> + machine_get_usb,
>> + machine_set_usb, NULL);
>>  object_property_add_str(obj, "firmware",
>> -machine_get_firmware, machine_set_firmware, 
>> NULL);
>> +machine_get_firmware,
>> +machine_set_firmware, NULL);
>> +object_property_add_bool(obj, "iommu",
>> + machine_get_iommu,
>> + machine_set_iommu, NULL);
>>  }
>>
>>  static void machine_finalize(Object *obj)
>> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
>> index a0a3068..3342711 100644
>> --- a/hw/pci-host/q35.c
>> +++ b/hw/pci-host/q35.c
>> @@ -347,6 +347,53 @@ static void mch_reset(DeviceState *qdev)
>>  mch_update(mch);
>>  }
>>
>> +static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int 
>> devfn)
>> +{
>> +IntelIOMMUState *s = opaque;
>> +VTDAddressSpace **pvtd_as;
>> +VTDAddressSpace *vtd_as;
>> +int bus_num = pci_bus_num(bus);
>> +
>> +assert(devfn >= 0);
>> +
>> +pvtd_as = s->address_spaces[bus_num];
>> +if (!pvtd_as) {
>> +/* No corresponding free() */
>> +pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) *
>> +VTD_PCI_SLOT_MAX * VTD_PCI_FUNC_MAX);
>> +s->address_spaces[bus_num] = pvtd_as;
>> +}
>> +
>> +vtd_as = *(pvtd_as + devfn);
>
> pvtd_as[devfn] is cleaner.
> In fact, you can then drop vtd_as local variable, use pvtd_as[devfn]
> directly.
>
>> +if (!vtd_as) {
>> +vtd_as = g_malloc0(sizeof(*vtd_as));
>> +*(pvtd_as + devfn) = vtd_as;
>> +
>> +vtd_as->bus_num = bus_num;
>> +vtd_as->devfn = devfn;
>> +vtd_as->iommu_state = s;
>> +memory_region_init_iommu(&vtd_as->iommu, OBJECT(s), &s->iommu_ops,
>> + "intel_iommu", UINT64_MAX);
>> +address_space_

Re: [Qemu-devel] [PATCH v3 3/5] intel-iommu: add DMAR table to ACPI tables

2014-08-14 Thread Le Tan
Hi Michael,

2014-08-14 19:06 GMT+08:00 Michael S. Tsirkin :
> On Mon, Aug 11, 2014 at 03:05:00PM +0800, Le Tan wrote:
>> Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE exists,
>> add DMAR table to ACPI RSDT table. For now the DMAR table indicates that 
>> there
>> is only one hardware unit without INTR_REMAP capability on the platform.
>>
>> Signed-off-by: Le Tan 
>
> Could you add a unit test please?
>
>> ---
>>  hw/i386/acpi-build.c | 41 ++
>>  hw/i386/acpi-defs.h  | 70 
>> 
>>  2 files changed, 111 insertions(+)
>>
>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>> index 816c6d9..595f501 100644
>> --- a/hw/i386/acpi-build.c
>> +++ b/hw/i386/acpi-build.c
>> @@ -47,6 +47,7 @@
>>  #include "hw/i386/ich9.h"
>>  #include "hw/pci/pci_bus.h"
>>  #include "hw/pci-host/q35.h"
>> +#include "hw/i386/intel_iommu.h"
>>
>>  #include "hw/i386/q35-acpi-dsdt.hex"
>>  #include "hw/i386/acpi-dsdt.hex"
>> @@ -1350,6 +1351,31 @@ build_mcfg_q35(GArray *table_data, GArray *linker, 
>> AcpiMcfgInfo *info)
>>  }
>>
>>  static void
>> +build_dmar_q35(GArray *table_data, GArray *linker)
>> +{
>> +int dmar_start = table_data->len;
>> +
>> +AcpiTableDmar *dmar;
>> +AcpiDmarHardwareUnit *drhd;
>> +
>> +dmar = acpi_data_push(table_data, sizeof(*dmar));
>> +dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
>> +dmar->flags = 0;/* No intr_remap for now */
>> +
>> +/* DMAR Remapping Hardware Unit Definition structure */
>> +drhd = acpi_data_push(table_data, sizeof(*drhd));
>> +drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
>> +drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope now */
>> +drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
>> +drhd->pci_segment = cpu_to_le16(0);
>> +drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
>> +
>> +build_header(linker, table_data, (void *)(table_data->data + 
>> dmar_start),
>> + "DMAR", table_data->len - dmar_start, 1);
>> +}
>> +
>> +
>> +static void
>>  build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc)
>>  {
>>  AcpiTableHeader *dsdt;
>> @@ -1470,6 +1496,17 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
>>  return true;
>>  }
>>
>> +static bool acpi_has_iommu(void)
>> +{
>> +bool ambiguous;
>> +Object *intel_iommu;
>> +
>> +intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE,
>> +   &ambiguous);
>> +return intel_iommu && !ambiguous;
>> +}
>> +
>> +
>>  static
>>  void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
>>  {
>> @@ -1539,6 +1576,10 @@ void acpi_build(PcGuestInfo *guest_info, 
>> AcpiBuildTables *tables)
>>  acpi_add_table(table_offsets, tables->table_data);
>>  build_mcfg_q35(tables->table_data, tables->linker, &mcfg);
>>  }
>> +if (acpi_has_iommu()) {
>> +acpi_add_table(table_offsets, tables->table_data);
>> +build_dmar_q35(tables->table_data, tables->linker);
>> +}
>>
>>  /* Add tables supplied by user (if any) */
>>  for (u = acpi_table_first(); u; u = acpi_table_next(u)) {
>> diff --git a/hw/i386/acpi-defs.h b/hw/i386/acpi-defs.h
>> index e93babb..9674825 100644
>> --- a/hw/i386/acpi-defs.h
>> +++ b/hw/i386/acpi-defs.h
>> @@ -314,4 +314,74 @@ struct AcpiTableMcfg {
>>  } QEMU_PACKED;
>>  typedef struct AcpiTableMcfg AcpiTableMcfg;
>>
>> +/* DMAR - DMA Remapping table r2.2 */
>> +struct AcpiTableDmar {
>> +ACPI_TABLE_HEADER_DEF
>> +uint8_t host_address_width; /* Maximum DMA physical addressability */
>> +uint8_t flags;
>> +uint8_t reserved[10];
>> +} QEMU_PACKED;
>> +typedef struct AcpiTableDmar AcpiTableDmar;
>> +
>> +/* Masks for Flags field above */
>> +#define ACPI_DMAR_INTR_REMAP(1)
>> +#define ACPI_DMAR_X2APIC_OPT_OUT(2)
>> +
>> +/*
>> + * DMAR sub-structures (Follow DMA Remapping table)
>> + */
>> +#define ACPI_DMAR_SUB_HEADER_DEF /* Common ACPI DMAR sub-structure header 
>> */\
>> +uint16_t type;  \
>> +uint16_t length;
>
> R

Re: [Qemu-devel] [PATCH v3 4/5] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch

2014-08-14 Thread Le Tan
2014-08-14 19:35 GMT+08:00 Michael S. Tsirkin :
> On Thu, Aug 14, 2014 at 07:33:29PM +0800, Le Tan wrote:
>> 2014-08-14 19:12 GMT+08:00 Michael S. Tsirkin :
>> > On Mon, Aug 11, 2014 at 03:05:01PM +0800, Le Tan wrote:
>> >> Add Intel IOMMU emulation to q35 chipset and expose it to the guest.
>> >> 1. Add a machine option. Users can use "-machine iommu=on|off" in the 
>> >> command
>> >> line to enable/disable Intel IOMMU. The default is off.
>> >> 2. Accroding to the machine option, q35 will initialize the Intel IOMMU 
>> >> and
>> >> use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function 
>> >> for
>> >> the pci bus.
>> >> 3. q35_host_dma_iommu() will return different address space according to 
>> >> the
>> >> bus_num and devfn of the device.
>> >>
>> >> Signed-off-by: Le Tan 
>> >> ---
>> >>  hw/core/machine.c | 27 +---
>> >>  hw/pci-host/q35.c | 64 
>> >> +++
>> >>  include/hw/boards.h   |  1 +
>> >>  include/hw/pci-host/q35.h |  2 ++
>> >>  qemu-options.hx   |  5 +++-
>> >>  vl.c  |  4 +++
>> >>  6 files changed, 94 insertions(+), 9 deletions(-)
>> >>
>> >> diff --git a/hw/core/machine.c b/hw/core/machine.c
>> >> index 7a66c57..f0046d6 100644
>> >> --- a/hw/core/machine.c
>> >> +++ b/hw/core/machine.c
>> >> @@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const 
>> >> char *value, Error **errp)
>> >>  ms->firmware = g_strdup(value);
>> >>  }
>> >>
>> >> +static bool machine_get_iommu(Object *obj, Error **errp)
>> >> +{
>> >> +MachineState *ms = MACHINE(obj);
>> >> +
>> >> +return ms->iommu;
>> >> +}
>> >> +
>> >> +static void machine_set_iommu(Object *obj, bool value, Error **errp)
>> >> +{
>> >> +MachineState *ms = MACHINE(obj);
>> >> +
>> >> +ms->iommu = value;
>> >> +}
>> >> +
>> >>  static void machine_initfn(Object *obj)
>> >>  {
>> >>  object_property_add_str(obj, "accel",
>> >> @@ -270,10 +284,17 @@ static void machine_initfn(Object *obj)
>> >>   machine_set_dump_guest_core,
>> >>   NULL);
>> >>  object_property_add_bool(obj, "mem-merge",
>> >> - machine_get_mem_merge, 
>> >> machine_set_mem_merge, NULL);
>> >> -object_property_add_bool(obj, "usb", machine_get_usb, 
>> >> machine_set_usb, NULL);
>> >> + machine_get_mem_merge,
>> >> + machine_set_mem_merge, NULL);
>> >> +object_property_add_bool(obj, "usb",
>> >> + machine_get_usb,
>> >> + machine_set_usb, NULL);
>> >>  object_property_add_str(obj, "firmware",
>> >> -machine_get_firmware, machine_set_firmware, 
>> >> NULL);
>> >> +machine_get_firmware,
>> >> +machine_set_firmware, NULL);
>> >> +object_property_add_bool(obj, "iommu",
>> >> + machine_get_iommu,
>> >> + machine_set_iommu, NULL);
>> >>  }
>> >>
>> >>  static void machine_finalize(Object *obj)
>> >> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
>> >> index a0a3068..3342711 100644
>> >> --- a/hw/pci-host/q35.c
>> >> +++ b/hw/pci-host/q35.c
>> >> @@ -347,6 +347,53 @@ static void mch_reset(DeviceState *qdev)
>> >>  mch_update(mch);
>> >>  }
>> >>
>> >> +static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int 
>> >> devfn)
>> >> +{
>> >> +IntelIOMMUState *s = opaque;
>> >> +VTDAddressSpace **pvtd_as;
>> >> +VTDAddressSpace *vtd_as;
>> >> +int bus_num = pci_bus_num(bus);
>> >> +
>> >> +assert(devfn >= 0);
>> >> +
>> >> +pvtd_as = s-

Re: [Qemu-devel] [PATCH v3 3/5] intel-iommu: add DMAR table to ACPI tables

2014-08-14 Thread Le Tan
2014-08-14 19:51 GMT+08:00 Jan Kiszka :
> On 2014-08-14 13:43, Michael S. Tsirkin wrote:
>> On Thu, Aug 14, 2014 at 01:36:49PM +0200, Jan Kiszka wrote:
>>> On 2014-08-14 13:06, Michael S. Tsirkin wrote:
>>>> On Mon, Aug 11, 2014 at 03:05:00PM +0800, Le Tan wrote:
>>>>> Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE 
>>>>> exists,
>>>>> add DMAR table to ACPI RSDT table. For now the DMAR table indicates that 
>>>>> there
>>>>> is only one hardware unit without INTR_REMAP capability on the platform.
>>>>>
>>>>> Signed-off-by: Le Tan 
>>>>
>>>> Could you add a unit test please?
>>>
>>> While unit tests would really be helpful, I'm afraid that's not in reach
>>> (GSoC is almost over). The good news is that we have pretty broad test
>>> coverage with both Linux and also Jailhouse already.
>>>
>>> Do you see unit tests as precondition for merging the series?
>>>
>>> Jan
>>>
>>
>> Not a pre-requisite - it's very easy to add a unit test,
>> just add a case in test_acpi_tcg. So I can do it myself afterwards.
>>
>
> Ah, ok, that's tests/bios-tables-test.c. Maybe Le can have a look if
> time is left. But hard pecils-down is already on the 18th.
>
> Jan
>
>

OK, I will attempt to add that. I have just pushed the IOTLB cache. I
am going to fix patches according to these reviews now.

Le



Re: [Qemu-devel] [PATCH v3 0/5] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset

2014-08-15 Thread Le Tan
Hi Knut,

2014-08-15 19:15 GMT+08:00 Knut Omang :
> On Fri, 2014-08-15 at 06:42 +0200, Knut Omang wrote:
>> On Thu, 2014-08-14 at 14:10 +0200, Jan Kiszka wrote:
>> > On 2014-08-14 13:15, Michael S. Tsirkin wrote:
>> > > On Mon, Aug 11, 2014 at 03:04:57PM +0800, Le Tan wrote:
>> > >> Hi,
>> > >>
>> > >> These patches are intended to introduce Intel IOMMU (VT-d) emulation to 
>> > >> q35
>> > >> chipset. The major job in these patches is to add support for emulating 
>> > >> Intel
>> > >> IOMMU according to the VT-d specification, including basic responses to 
>> > >> CSRs
>> > >> accesses, the logics of DMAR (DMA remapping) and DMA memory address
>> > >> translations.
>> > >
>> > > Thanks!
>> > > Looks very good overall, I noted some coding style issues - I didn't
>> > > bother reporting each issue in every place where it appears - reported
>> > > each issue once only, so please find and fix all instances of each
>> > > issue.
>> >
>> > BTW, because I was in urgent need for virtual test environment for
>> > Jailhouse, I hacked interrupt remapping on top of Le's patches:
>> >
>> > http://git.kiszka.org/?p=qemu.git;a=shortlog;h=refs/heads/queues/vtd-intremap
>> >
>> > The approach likely needs further discussions and refinements but it
>> > already allows me to work on top with our hypervisor, and also Linux.
>> > You can see from the last commit that Le's work made it pretty easy to
>> > build this on top.
>>
>> Le,
>>
>> I have tried Jan's branch with my device setup which consists of a
>> minimal q35 setup, an ioh3420 root port (specified as -device
>> ioh3420,slot=0 ) and a pcie device plugged into that root port, which
>> gives the following lscpi -t:
>>
>> -[:00]-+-00.0
>>+-01.0
>>+-02.0
>>+-03.0-[01]00.0
>>+-04.0
>>+-1f.0
>>+-1f.2
>>\-1f.3
>>
>> All seems to work beautifully (I see the ISA bridge happily receive
>> translations) until the first DMA from my device model (at 1:00.0)
>> arrives, at which point I get:
>>
>> [ 1663.732413] dmar: DMAR:[DMA Write] Request device [00:03.0] fault addr 
>> fffa
>> [ 1663.732413] DMAR:[fault reason 02] Present bit in context entry is clear
>>
>> I would have expected request device 01:00.0 for this.
>> It is not clear to me yet if this is a weakness of the implementation of
>> ioh3420 or the iommu. Just wanted to let you know right away in case you
>> can shed some light to it or it is an easy fix,
>>
>> The device uses pci_dma_rw with itself as device pointer.
>
> To verify my hypothesis: with this rude hack my device now works much
> better:
>
> @@ -774,6 +780,8 @@ static void iommu_translate(VTDAddressSpace *vtd_as,
> int bus_num, int devfn,
>  is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
>  } else {
>  ret_fr = dev_to_context_entry(s, bus_num, devfn, &ce);
> +if (ret_fr)
> +ret_fr = dev_to_context_entry(s, 1, 0, &ce);
>  is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
>  if (ret_fr) {
>  ret_fr = -ret_fr;
>
> Looking at how things look on hardware, multiple devices often receive
> overlapping DMA address ranges for different physical addresses.
>
> So if I understand the way this works, every requester ID would also
> need to have it's own unique VTDAddressSpace, as each pci
> device/function sees a unique DMA address space..

ioh3420 is a pcie-to-pcie bridge, right? In my opinion, each pci-e
device behind the pcie-to-pcie bridge can be assigned individually.
For now I added the VT-d to q35 by just adding it to the root pci bus.
You can see here in q35.c:
pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu);
So if we add a pcie-to-pcie bridge, we may have to call the
pci_setup_iommu() for that new bus. I don't know where to hook into
this now. :) If you know the mechanism behind that, you can try to add
that for the new bus. (I will dive into this after the clean up.)
What do you think?
Thanks very much for your testing! :)

Le


> Knut
>



[Qemu-devel] [PATCH v4 2/8] intel-iommu: introduce Intel IOMMU (VT-d) emulation

2014-08-15 Thread Le Tan
Add support for emulating Intel IOMMU according to the VT-d specification for
the q35 chipset machine. Implement the logics for DMAR (DMA remapping) without
PASID support. The emulation supports register-based invalidation and primary
fault logging.

Signed-off-by: Le Tan 
---
 hw/i386/Makefile.objs  |1 +
 hw/i386/intel_iommu.c  | 1257 
 hw/i386/intel_iommu_internal.h |  333 +++
 include/hw/i386/intel_iommu.h  |   89 +++
 4 files changed, 1680 insertions(+)
 create mode 100644 hw/i386/intel_iommu.c
 create mode 100644 hw/i386/intel_iommu_internal.h
 create mode 100644 include/hw/i386/intel_iommu.h

diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 48014ab..6936111 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -2,6 +2,7 @@ obj-$(CONFIG_KVM) += kvm/
 obj-y += multiboot.o smbios.o
 obj-y += pc.o pc_piix.o pc_q35.o
 obj-y += pc_sysfw.o
+obj-y += intel_iommu.o
 obj-$(CONFIG_XEN) += ../xenpv/ xen/
 
 obj-y += kvmvapic.o
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
new file mode 100644
index 000..8e67e04
--- /dev/null
+++ b/hw/i386/intel_iommu.c
@@ -0,0 +1,1257 @@
+/*
+ * QEMU emulation of an Intel IOMMU (VT-d)
+ *   (DMA Remapping device)
+ *
+ * Copyright (C) 2013 Knut Omang, Oracle 
+ * Copyright (C) 2014 Le Tan, 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/sysbus.h"
+#include "exec/address-spaces.h"
+#include "intel_iommu_internal.h"
+
+/*#define DEBUG_INTEL_IOMMU*/
+#ifdef DEBUG_INTEL_IOMMU
+enum {
+DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
+};
+#define VTD_DBGBIT(x)   (1 << DEBUG_##x)
+static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
+
+#define VTD_DPRINTF(what, fmt, ...) do { \
+if (vtd_dbgflags & VTD_DBGBIT(what)) { \
+fprintf(stderr, "(vtd)%s: " fmt "\n", __func__, \
+## __VA_ARGS__); } \
+} while (0)
+#else
+#define VTD_DPRINTF(what, fmt, ...) do {} while (0)
+#endif
+
+static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
+uint64_t wmask, uint64_t w1cmask)
+{
+stq_le_p(&s->csr[addr], val);
+stq_le_p(&s->wmask[addr], wmask);
+stq_le_p(&s->w1cmask[addr], w1cmask);
+}
+
+static void vtd_define_quad_wo(IntelIOMMUState *s, hwaddr addr, uint64_t mask)
+{
+stq_le_p(&s->womask[addr], mask);
+}
+
+static void vtd_define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val,
+uint32_t wmask, uint32_t w1cmask)
+{
+stl_le_p(&s->csr[addr], val);
+stl_le_p(&s->wmask[addr], wmask);
+stl_le_p(&s->w1cmask[addr], w1cmask);
+}
+
+static void vtd_define_long_wo(IntelIOMMUState *s, hwaddr addr, uint32_t mask)
+{
+stl_le_p(&s->womask[addr], mask);
+}
+
+/* "External" get/set operations */
+static void vtd_set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val)
+{
+uint64_t oldval = ldq_le_p(&s->csr[addr]);
+uint64_t wmask = ldq_le_p(&s->wmask[addr]);
+uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
+stq_le_p(&s->csr[addr],
+ ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
+}
+
+static void vtd_set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val)
+{
+uint32_t oldval = ldl_le_p(&s->csr[addr]);
+uint32_t wmask = ldl_le_p(&s->wmask[addr]);
+uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
+stl_le_p(&s->csr[addr],
+ ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
+}
+
+static uint64_t vtd_get_quad(IntelIOMMUState *s, hwaddr addr)
+{
+uint64_t val = ldq_le_p(&s->csr[addr]);
+uint64_t womask = ldq_le_p(&s->womask[addr]);
+return val & ~womask;
+}
+
+static uint32_t vtd_get_long(IntelIOMMUState *s, hwaddr addr)
+{
+uint32_t val = ldl_le_p(&s->csr[addr]);
+uint32_t womask = ldl_le_p(&s->womask[addr]);
+return val & ~womask;
+}
+
+/* "Internal" get/set operations */
+static uint64_t vtd_get_quad_raw(IntelIOMMUState *s, hwaddr addr)
+{
+return ldq_le_p(&s->csr[addr]);
+}
+
+static uint32_t vtd_get_long_r

[Qemu-devel] [PATCH v4 0/8] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset

2014-08-15 Thread Le Tan
Hi,

These patches are intended to introduce Intel IOMMU (VT-d) emulation to q35
chipset. The major job in these patches is to add support for emulating Intel
IOMMU according to the VT-d specification, including basic responses to CSRs
accesses, the logics of DMAR (DMA remapping) and DMA memory address
translations.

Features implemented for now are:
1. Response to important CSRs accesses;
2. DMAR (DMA remapping) without PASID support;
3. Primary fault logging;
4. Support both register-based and queued invalidation for IOTLB and context
   cache invalidation;
5. Add DMAR table to ACPI tables to expose VT-d to BIOS;
6. Add "-machine iommu=on|off" option to enable/disable VT-d;
7. Only one DMAR unit for all the devices of PCI Segment 0;
8. Context-cache and IOTLB.

Testing:
1. L1 guest with Linux with intel_iommu=on can interact with VT-d and boot
smoothly, and there exists information about VT-d in the log of kernel;
2. Run L1 with VT-d, L2 guest with Linux can boot smoothly withou PCI device
passthrough;
3. Run L1 with VT-d and "-soundhw ac97 (QEMU_AUDIO_DRV=alsa)", then assign the
sound card to L2; L2 can boot smoothly with legacy PCI assignment and I can
hear the music played in L2 from the host speakers;
4. Jailhouse hypervisor can run smoothly (tested by Jan).
5. Run L1 with VT-d and e1000 network card, then assign e1000 to L2; L2 will be
STUCK when booting. This still remains unsolved now. As far as I know, I suppose
that the L2 crashes when doing e1000_probe(). The QEMU of L1 will dump
something with "KVM: entry failed, hardware error 0x0", and the KVM of host
will print "nested_vmx_exit_handled failed vm entry 7". Unlike assigning the
sound card, after being assigned to L2, there is no translation entry of e1000
through VT-d, which I think means that e1000 doesn't issue any DMA access during
the boot of L2. Sometimes the kernel of L2 will print "divide error" during
booting. Maybe it results from the lack of reset mechanism.
6. VFIO is tested and is similar to legacy pci assignment.

TODO:
1. Fix the bug of legacy PCI assignment;
2. Add unit test for DMAR ACPI table;
3. Add support for PCIE-to-PCIE bridge.

Changes since v3:
*address reviewing suggestions given by Jan and Michael
-implement Context-cache and IOTLB
-remove 'inline' keyword from most functions
-rename all the functions with prefix vtd_
-clean up constant definitions

Changes since v2:
*address reviewing suggestions given by Jan
-add support for primary fault logging
-add support for queued invalidation

Changes since v1:
*address reviewing suggestions given by Michael, Paolo, Stefan and Jan
-split intel_iommu.h to include/hw/i386/intel_iommu.h and
 hw/i386/intel_iommu_internal.h
-change the copyright information
-change D() to VTD_DPRINTF()
-remove dead code
-rename constant definitions with consistent prefix VTD_
-rename some struct definitions according to QEMU standard
-rename some CSRs access functions
-use endian-save functions to access CSRs
-change machine option to "iommu=on|off"

Thanks very much!

Git trees:
https://github.com/tamlok/qemu

Le Tan (8):
  iommu: add is_write as a parameter to the translate function of
MemoryRegionIOMMUOps
  intel-iommu: introduce Intel IOMMU (VT-d) emulation
  intel-iommu: add DMAR table to ACPI tables
  intel-iommu: add Intel IOMMU emulation to q35 and add a machine option
"iommu" as a switch
  intel-iommu: fix coding style issues around in q35.c and machine.c
  intel-iommu: add supports for queued invalidation interface
  intel-iommu: add context-cache to cache context-entry
  intel-iommu: add IOTLB using hash table

 exec.c |2 +-
 hw/alpha/typhoon.c |3 +-
 hw/core/machine.c  |   27 +-
 hw/i386/Makefile.objs  |1 +
 hw/i386/acpi-build.c   |   39 +
 hw/i386/acpi-defs.h|   40 +
 hw/i386/intel_iommu.c  | 1963 
 hw/i386/intel_iommu_internal.h |  389 
 hw/pci-host/apb.c  |3 +-
 hw/pci-host/q35.c  |   58 +-
 hw/ppc/spapr_iommu.c   |3 +-
 include/exec/memory.h  |2 +-
 include/hw/boards.h|1 +
 include/hw/i386/intel_iommu.h  |  120 +++
 include/hw/pci-host/q35.h  |2 +
 qemu-options.hx|5 +-
 vl.c   |4 +
 17 files changed, 2648 insertions(+), 14 deletions(-)
 create mode 100644 hw/i386/intel_iommu.c
 create mode 100644 hw/i386/intel_iommu_internal.h
 create mode 100644 include/hw/i386/intel_iommu.h

-- 
1.9.1




[Qemu-devel] [PATCH v4 1/8] iommu: add is_write as a parameter to the translate function of MemoryRegionIOMMUOps

2014-08-15 Thread Le Tan
Add a bool variable is_write as a parameter to the translate function of
MemoryRegionIOMMUOps to indicate the operation of the access. It can be
used for correct fault reporting from within the callback.
Change the interface of related functions.

Signed-off-by: Le Tan 
---
 exec.c| 2 +-
 hw/alpha/typhoon.c| 3 ++-
 hw/pci-host/apb.c | 3 ++-
 hw/ppc/spapr_iommu.c  | 3 ++-
 include/exec/memory.h | 2 +-
 5 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/exec.c b/exec.c
index 765bd94..5ccc106 100644
--- a/exec.c
+++ b/exec.c
@@ -373,7 +373,7 @@ MemoryRegion *address_space_translate(AddressSpace *as, 
hwaddr addr,
 break;
 }
 
-iotlb = mr->iommu_ops->translate(mr, addr);
+iotlb = mr->iommu_ops->translate(mr, addr, is_write);
 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
 | (addr & iotlb.addr_mask));
 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 67a1070..31947d9 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -660,7 +660,8 @@ static bool window_translate(TyphoonWindow *win, hwaddr 
addr,
 /* Handle PCI-to-system address translation.  */
 /* TODO: A translation failure here ought to set PCI error codes on the
Pchip and generate a machine check interrupt.  */
-static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr)
+static IOMMUTLBEntry typhoon_translate_iommu(MemoryRegion *iommu, hwaddr addr,
+ bool is_write)
 {
 TyphoonPchip *pchip = container_of(iommu, TyphoonPchip, iommu);
 IOMMUTLBEntry ret;
diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index d238a84..0e0e0ee 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c
@@ -203,7 +203,8 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void 
*opaque, int devfn)
 return &is->iommu_as;
 }
 
-static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr)
+static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr,
+ bool is_write)
 {
 IOMMUState *is = container_of(iommu, IOMMUState, iommu);
 hwaddr baseaddr, offset;
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index f6e32a4..6c91d8e 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -59,7 +59,8 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
 return NULL;
 }
 
-static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr 
addr)
+static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr 
addr,
+   bool is_write)
 {
 sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
 uint64_t tce;
diff --git a/include/exec/memory.h b/include/exec/memory.h
index e2c8e3e..834543d 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -129,7 +129,7 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
 
 struct MemoryRegionIOMMUOps {
 /* Return a TLB entry that contains a given address. */
-IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr);
+IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool 
is_write);
 };
 
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
-- 
1.9.1




[Qemu-devel] [PATCH v4 4/8] intel-iommu: add Intel IOMMU emulation to q35 and add a machine option "iommu" as a switch

2014-08-15 Thread Le Tan
Add Intel IOMMU emulation to q35 chipset and expose it to the guest.
1. Add a machine option. Users can use "-machine iommu=on|off" in the command
line to enable/disable Intel IOMMU. The default is off.
2. Accroding to the machine option, q35 will initialize the Intel IOMMU and
use pci_setup_iommu() to setup q35_host_dma_iommu() as the IOMMU function for
the pci bus.
3. q35_host_dma_iommu() will return different address space according to the
bus_num and devfn of the device.

Signed-off-by: Le Tan 
---
 hw/core/machine.c | 17 +
 hw/pci-host/q35.c | 46 ++
 include/hw/boards.h   |  1 +
 include/hw/pci-host/q35.h |  2 ++
 qemu-options.hx   |  5 -
 vl.c  |  4 
 6 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 7a66c57..0708de5 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -235,6 +235,20 @@ static void machine_set_firmware(Object *obj, const char 
*value, Error **errp)
 ms->firmware = g_strdup(value);
 }
 
+static bool machine_get_iommu(Object *obj, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+return ms->iommu;
+}
+
+static void machine_set_iommu(Object *obj, bool value, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+ms->iommu = value;
+}
+
 static void machine_initfn(Object *obj)
 {
 object_property_add_str(obj, "accel",
@@ -274,6 +288,9 @@ static void machine_initfn(Object *obj)
 object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, 
NULL);
 object_property_add_str(obj, "firmware",
 machine_get_firmware, machine_set_firmware, NULL);
+object_property_add_bool(obj, "iommu",
+ machine_get_iommu,
+ machine_set_iommu, NULL);
 }
 
 static void machine_finalize(Object *obj)
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index a0a3068..ad8f1b9 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -347,6 +347,48 @@ static void mch_reset(DeviceState *qdev)
 mch_update(mch);
 }
 
+static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+IntelIOMMUState *s = opaque;
+VTDAddressSpace **pvtd_as;
+int bus_num = pci_bus_num(bus);
+
+assert(0 <= bus_num && bus_num <= VTD_PCI_BUS_MAX);
+assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX);
+
+pvtd_as = s->address_spaces[bus_num];
+if (!pvtd_as) {
+/* No corresponding free() */
+pvtd_as = g_malloc0(sizeof(VTDAddressSpace *) * VTD_PCI_DEVFN_MAX);
+s->address_spaces[bus_num] = pvtd_as;
+}
+if (!pvtd_as[devfn]) {
+pvtd_as[devfn] = g_malloc0(sizeof(VTDAddressSpace));
+
+pvtd_as[devfn]->bus_num = (uint8_t)bus_num;
+pvtd_as[devfn]->devfn = (uint8_t)devfn;
+pvtd_as[devfn]->iommu_state = s;
+memory_region_init_iommu(&pvtd_as[devfn]->iommu, OBJECT(s),
+ &s->iommu_ops, "intel_iommu", UINT64_MAX);
+address_space_init(&pvtd_as[devfn]->as,
+   &pvtd_as[devfn]->iommu, "intel_iommu");
+}
+return &pvtd_as[devfn]->as;
+}
+
+static void mch_init_dmar(MCHPCIState *mch)
+{
+PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch)));
+
+mch->iommu = INTEL_IOMMU_DEVICE(qdev_create(NULL, 
TYPE_INTEL_IOMMU_DEVICE));
+object_property_add_child(OBJECT(mch), "intel-iommu",
+  OBJECT(mch->iommu), NULL);
+qdev_init_nofail(DEVICE(mch->iommu));
+sysbus_mmio_map(SYS_BUS_DEVICE(mch->iommu), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
+
+pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu);
+}
+
 static int mch_init(PCIDevice *d)
 {
 int i;
@@ -370,6 +412,10 @@ static int mch_init(PCIDevice *d)
  &mch->pam_regions[i+1], PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE,
  PAM_EXPAN_SIZE);
 }
+/* Intel IOMMU (VT-d) */
+if (qemu_opt_get_bool(qemu_get_machine_opts(), "iommu", false)) {
+mch_init_dmar(mch);
+}
 return 0;
 }
 
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 605a970..dfb6718 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -123,6 +123,7 @@ struct MachineState {
 bool mem_merge;
 bool usb;
 char *firmware;
+bool iommu;
 
 ram_addr_t ram_size;
 ram_addr_t maxram_size;
diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h
index d9ee978..025d6e6 100644
--- a/include/hw/pci-host/q35.h
+++ b/include/hw/pci-host/q35.h
@@ -33,6 +33,7 @@
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/ich9.h"
 #include "hw/pci-host/pam.h"
+#include "hw/i386/intel_iommu.h"
 
 #define TYPE_Q35_HOST_DEVICE 

[Qemu-devel] [PATCH v4 3/8] intel-iommu: add DMAR table to ACPI tables

2014-08-15 Thread Le Tan
Expose Intel IOMMU to the BIOS. If object of TYPE_INTEL_IOMMU_DEVICE exists,
add DMAR table to ACPI RSDT table. For now the DMAR table indicates that there
is only one hardware unit without INTR_REMAP capability on the platform.

Signed-off-by: Le Tan 
---
 hw/i386/acpi-build.c | 39 +++
 hw/i386/acpi-defs.h  | 40 
 2 files changed, 79 insertions(+)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 816c6d9..ac56a63 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -47,6 +47,7 @@
 #include "hw/i386/ich9.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci-host/q35.h"
+#include "hw/i386/intel_iommu.h"
 
 #include "hw/i386/q35-acpi-dsdt.hex"
 #include "hw/i386/acpi-dsdt.hex"
@@ -1350,6 +1351,30 @@ build_mcfg_q35(GArray *table_data, GArray *linker, 
AcpiMcfgInfo *info)
 }
 
 static void
+build_dmar_q35(GArray *table_data, GArray *linker)
+{
+int dmar_start = table_data->len;
+
+AcpiTableDmar *dmar;
+AcpiDmarHardwareUnit *drhd;
+
+dmar = acpi_data_push(table_data, sizeof(*dmar));
+dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
+dmar->flags = 0;/* No intr_remap for now */
+
+/* DMAR Remapping Hardware Unit Definition structure */
+drhd = acpi_data_push(table_data, sizeof(*drhd));
+drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
+drhd->length = cpu_to_le16(sizeof(*drhd));   /* No device scope now */
+drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
+drhd->pci_segment = cpu_to_le16(0);
+drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
+
+build_header(linker, table_data, (void *)(table_data->data + dmar_start),
+ "DMAR", table_data->len - dmar_start, 1);
+}
+
+static void
 build_dsdt(GArray *table_data, GArray *linker, AcpiMiscInfo *misc)
 {
 AcpiTableHeader *dsdt;
@@ -1470,6 +1495,16 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
 return true;
 }
 
+static bool acpi_has_iommu(void)
+{
+bool ambiguous;
+Object *intel_iommu;
+
+intel_iommu = object_resolve_path_type("", TYPE_INTEL_IOMMU_DEVICE,
+   &ambiguous);
+return intel_iommu && !ambiguous;
+}
+
 static
 void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables *tables)
 {
@@ -1539,6 +1574,10 @@ void acpi_build(PcGuestInfo *guest_info, AcpiBuildTables 
*tables)
 acpi_add_table(table_offsets, tables->table_data);
 build_mcfg_q35(tables->table_data, tables->linker, &mcfg);
 }
+if (acpi_has_iommu()) {
+acpi_add_table(table_offsets, tables->table_data);
+build_dmar_q35(tables->table_data, tables->linker);
+}
 
 /* Add tables supplied by user (if any) */
 for (u = acpi_table_first(); u; u = acpi_table_next(u)) {
diff --git a/hw/i386/acpi-defs.h b/hw/i386/acpi-defs.h
index e93babb..93f424b 100644
--- a/hw/i386/acpi-defs.h
+++ b/hw/i386/acpi-defs.h
@@ -314,4 +314,44 @@ struct AcpiTableMcfg {
 } QEMU_PACKED;
 typedef struct AcpiTableMcfg AcpiTableMcfg;
 
+/* DMAR - DMA Remapping table r2.2 */
+struct AcpiTableDmar {
+ACPI_TABLE_HEADER_DEF
+uint8_t host_address_width; /* Maximum DMA physical addressability */
+uint8_t flags;
+uint8_t reserved[10];
+} QEMU_PACKED;
+typedef struct AcpiTableDmar AcpiTableDmar;
+
+/* Masks for Flags field above */
+#define ACPI_DMAR_INTR_REMAP1
+#define ACPI_DMAR_X2APIC_OPT_OUT(1 << 1)
+
+/* Values for sub-structure type for DMAR */
+enum {
+ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,   /* DRHD */
+ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, /* RMRR */
+ACPI_DMAR_TYPE_ATSR = 2,/* ATSR */
+ACPI_DMAR_TYPE_HARDWARE_AFFINITY = 3,   /* RHSR */
+ACPI_DMAR_TYPE_ANDD = 4,/* ANDD */
+ACPI_DMAR_TYPE_RESERVED = 5 /* Reserved for furture use */
+};
+
+/*
+ * Sub-structures for DMAR
+ */
+/* Type 0: Hardware Unit Definition */
+struct AcpiDmarHardwareUnit {
+uint16_t type;
+uint16_t length;
+uint8_t flags;
+uint8_t reserved;
+uint16_t pci_segment;   /* The PCI Segment associated with this unit */
+uint64_t address;   /* Base address of remapping hardware register-set */
+} QEMU_PACKED;
+typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit;
+
+/* Masks for Flags field above */
+#define ACPI_DMAR_INCLUDE_PCI_ALL   1
+
 #endif
-- 
1.9.1




[Qemu-devel] [PATCH v4 8/8] intel-iommu: add IOTLB using hash table

2014-08-15 Thread Le Tan
Add IOTLB to cache information about the translation of input-addresses. IOTLB
use a GHashTable as cache. The key of the hash table is the logical-OR of gfn
and source id after left-shifting.

Signed-off-by: Le Tan 
---
 hw/i386/intel_iommu.c  | 213 -
 hw/i386/intel_iommu_internal.h |  34 ++-
 include/hw/i386/intel_iommu.h  |  11 ++-
 3 files changed, 251 insertions(+), 7 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c514310..0a4282a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -132,6 +132,35 @@ static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState 
*s, hwaddr addr,
 return new_val;
 }
 
+/* GHashTable functions */
+static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
+{
+return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+}
+
+static guint vtd_uint64_hash(gconstpointer v)
+{
+return (guint)*(const uint64_t *)v;
+}
+
+static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
+  gpointer user_data)
+{
+VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+uint16_t domain_id = *(uint16_t *)user_data;
+return entry->domain_id == domain_id;
+}
+
+static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
+gpointer user_data)
+{
+VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
+uint64_t gfn = info->gfn & info->mask;
+return (entry->domain_id == info->domain_id) &&
+((entry->gfn & info->mask) == gfn);
+}
+
 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
  * IntelIOMMUState to 1.
  */
@@ -159,6 +188,48 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
 s->context_cache_gen = 1;
 }
 
+static void vtd_reset_iotlb(IntelIOMMUState *s)
+{
+assert(s->iotlb);
+g_hash_table_remove_all(s->iotlb);
+}
+
+static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
+   hwaddr addr)
+{
+uint64_t key;
+
+key = (addr >> VTD_PAGE_SHIFT_4K) |
+   ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT);
+return g_hash_table_lookup(s->iotlb, &key);
+
+}
+
+static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
+ uint16_t domain_id, hwaddr addr, uint64_t slpte,
+ bool read_flags, bool write_flags)
+{
+VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
+uint64_t *key = g_malloc(sizeof(*key));
+uint64_t gfn = addr >> VTD_PAGE_SHIFT_4K;
+
+VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
+" slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte,
+domain_id);
+if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
+VTD_DPRINTF(CACHE, "iotlb exceeds size limit, forced to reset");
+vtd_reset_iotlb(s);
+}
+
+entry->gfn = gfn;
+entry->domain_id = domain_id;
+entry->slpte = slpte;
+entry->read_flags = read_flags;
+entry->write_flags = write_flags;
+*key = gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT);
+g_hash_table_replace(s->iotlb, key, entry);
+}
+
 /* Given the reg addr of both the message data and address, generate an
  * interrupt via MSI.
  */
@@ -693,6 +764,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, 
uint8_t bus_num,
 bool is_fpd_set = false;
 bool reads = true;
 bool writes = true;
+VTDIOTLBEntry *iotlb_entry;
 
 /* Check if the request is in interrupt address range */
 if (vtd_is_interrupt_addr(addr)) {
@@ -716,6 +788,17 @@ static void vtd_do_iommu_translate(VTDAddressSpace 
*vtd_as, uint8_t bus_num,
 return;
 }
 }
+/* Try to fetch slpte form IOTLB */
+iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
+if (iotlb_entry) {
+VTD_DPRINTF(CACHE, "hit iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
+" slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr,
+iotlb_entry->slpte, iotlb_entry->domain_id);
+slpte = iotlb_entry->slpte;
+reads = iotlb_entry->read_flags;
+writes = iotlb_entry->write_flags;
+goto out;
+}
 /* Try to fetch context-entry from cache first */
 if (cc_entry->context_cache_gen == s->context_cache_gen) {
 VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d "
@@ -760,6 +843,9 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, 
uint8_t bus_num,
 return;
 }
 
+vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
+

[Qemu-devel] [PATCH v4 5/8] intel-iommu: fix coding style issues around in q35.c and machine.c

2014-08-15 Thread Le Tan
Fix coding style issues around in hw/pci-host/q35.c and hw/core/machine.c.

Signed-off-by: Le Tan 
---
 hw/core/machine.c | 10 +++---
 hw/pci-host/q35.c | 11 ++-
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 0708de5..f0046d6 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -284,10 +284,14 @@ static void machine_initfn(Object *obj)
  machine_set_dump_guest_core,
  NULL);
 object_property_add_bool(obj, "mem-merge",
- machine_get_mem_merge, machine_set_mem_merge, 
NULL);
-object_property_add_bool(obj, "usb", machine_get_usb, machine_set_usb, 
NULL);
+ machine_get_mem_merge,
+ machine_set_mem_merge, NULL);
+object_property_add_bool(obj, "usb",
+ machine_get_usb,
+ machine_set_usb, NULL);
 object_property_add_str(obj, "firmware",
-machine_get_firmware, machine_set_firmware, NULL);
+machine_get_firmware,
+machine_set_firmware, NULL);
 object_property_add_bool(obj, "iommu",
  machine_get_iommu,
  machine_set_iommu, NULL);
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index ad8f1b9..5618ad2 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -405,12 +405,13 @@ static int mch_init(PCIDevice *d)
 memory_region_add_subregion_overlap(mch->system_memory, 0xa,
 &mch->smram_region, 1);
 memory_region_set_enabled(&mch->smram_region, false);
-init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, 
mch->pci_address_space,
- &mch->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE);
+init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
+ mch->pci_address_space, &mch->pam_regions[0],
+ PAM_BIOS_BASE, PAM_BIOS_SIZE);
 for (i = 0; i < 12; ++i) {
-init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, 
mch->pci_address_space,
- &mch->pam_regions[i+1], PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE,
- PAM_EXPAN_SIZE);
+init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
+ mch->pci_address_space, &mch->pam_regions[i+1],
+ PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
 }
 /* Intel IOMMU (VT-d) */
 if (qemu_opt_get_bool(qemu_get_machine_opts(), "iommu", false)) {
-- 
1.9.1




[Qemu-devel] [PATCH v4 7/8] intel-iommu: add context-cache to cache context-entry

2014-08-15 Thread Le Tan
Add context-cache to cache context-entry encountered on a page-walk. Each
VTDAddressSpace has a member of VTDContextCacheEntry which represents an entry
in the context-cache. Since devices with different bus_num and devfn have their
respective VTDAddressSpace, this will be a good way to reference the cached
entries.
Each VTDContextCacheEntry will have a context_cache_gen and the cached entry
is valid only when context_cache_gen equals IntelIOMMUState.context_cache_gen.

Signed-off-by: Le Tan 
---
 hw/i386/intel_iommu.c  | 188 +++--
 hw/i386/intel_iommu_internal.h |  23 +++--
 hw/pci-host/q35.c  |   1 +
 include/hw/i386/intel_iommu.h  |  22 +
 4 files changed, 199 insertions(+), 35 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 60dec4f..c514310 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -27,6 +27,7 @@
 #ifdef DEBUG_INTEL_IOMMU
 enum {
 DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
+DEBUG_CACHE,
 };
 #define VTD_DBGBIT(x)   (1 << DEBUG_##x)
 static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
@@ -131,6 +132,33 @@ static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState 
*s, hwaddr addr,
 return new_val;
 }
 
+/* Reset all the gen of VTDAddressSpace to zero and set the gen of
+ * IntelIOMMUState to 1.
+ */
+static void vtd_reset_context_cache(IntelIOMMUState *s)
+{
+VTDAddressSpace **pvtd_as;
+VTDAddressSpace *vtd_as;
+uint32_t bus_it;
+uint32_t devfn_it;
+
+VTD_DPRINTF(CACHE, "global context_cache_gen=1");
+for (bus_it = 0; bus_it < VTD_PCI_BUS_MAX; ++bus_it) {
+pvtd_as = s->address_spaces[bus_it];
+if (!pvtd_as) {
+continue;
+}
+for (devfn_it = 0; devfn_it < VTD_PCI_DEVFN_MAX; ++devfn_it) {
+vtd_as = pvtd_as[devfn_it];
+if (!vtd_as) {
+continue;
+}
+vtd_as->context_cache_entry.context_cache_gen = 0;
+}
+}
+s->context_cache_gen = 1;
+}
+
 /* Given the reg addr of both the message data and address, generate an
  * interrupt via MSI.
  */
@@ -651,11 +679,13 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
  * @is_write: The access is a write operation
  * @entry: IOMMUTLBEntry that contain the addr to be translated and result
  */
-static void vtd_do_iommu_translate(IntelIOMMUState *s, uint8_t bus_num,
+static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num,
uint8_t devfn, hwaddr addr, bool is_write,
IOMMUTLBEntry *entry)
 {
+IntelIOMMUState *s = vtd_as->iommu_state;
 VTDContextEntry ce;
+VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
 uint64_t slpte;
 uint32_t level;
 uint16_t source_id = vtd_make_source_id(bus_num, devfn);
@@ -686,18 +716,35 @@ static void vtd_do_iommu_translate(IntelIOMMUState *s, 
uint8_t bus_num,
 return;
 }
 }
-
-ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
-is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
-if (ret_fr) {
-ret_fr = -ret_fr;
-if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
-VTD_DPRINTF(FLOG, "fault processing is disabled for DMA requests "
-"through this context-entry (with FPD Set)");
-} else {
-vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
+/* Try to fetch context-entry from cache first */
+if (cc_entry->context_cache_gen == s->context_cache_gen) {
+VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d "
+"(hi %"PRIx64 " lo %"PRIx64 " gen %"PRIu32 ")",
+bus_num, devfn, cc_entry->context_entry.hi,
+cc_entry->context_entry.lo, cc_entry->context_cache_gen);
+ce = cc_entry->context_entry;
+is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
+} else {
+ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
+is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
+if (ret_fr) {
+ret_fr = -ret_fr;
+if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
+VTD_DPRINTF(FLOG, "fault processing is disabled for DMA "
+"requests through this context-entry "
+"(with FPD Set)");
+} else {
+vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
+}
+return;
 }
-return;
+/* Update context-cache */
+VTD_DPRINTF(CACHE, "update context-cache bus %d devfn %d "
+"(hi %"PRIx64 " lo %"PRIx64 " gen

[Qemu-devel] [PATCH v4 6/8] intel-iommu: add supports for queued invalidation interface

2014-08-15 Thread Le Tan
Add supports for queued invalidation interface, an expended invalidation
interface with extended capabilities.

Signed-off-by: Le Tan 
---
 hw/i386/intel_iommu.c  | 373 -
 hw/i386/intel_iommu_internal.h |  27 ++-
 2 files changed, 393 insertions(+), 7 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 8e67e04..60dec4f 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -314,6 +314,41 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, 
uint16_t source_id,
 }
 }
 
+/* Handle Invalidation Queue Errors of queued invalidation interface error
+ * conditions.
+ */
+static void vtd_handle_inv_queue_error(IntelIOMMUState *s)
+{
+uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
+
+vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE);
+vtd_generate_fault_event(s, fsts_reg);
+}
+
+/* Set the IWC field and try to generate an invalidation completion interrupt 
*/
+static void vtd_generate_completion_event(IntelIOMMUState *s)
+{
+VTD_DPRINTF(INV, "completes an invalidation wait command with "
+"Interrupt Flag");
+if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) {
+VTD_DPRINTF(INV, "there is a previous interrupt condition to be "
+"serviced by software, "
+"new invalidation event is not generated");
+return;
+}
+vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC);
+vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP);
+if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) {
+VTD_DPRINTF(INV, "IM filed in IECTL_REG is set, new invalidation "
+"event is not generated");
+return;
+} else {
+/* Generate the interrupt event */
+vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
+vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
+}
+}
+
 static inline bool vtd_root_entry_present(VTDRootEntry *root)
 {
 return root->val & VTD_ROOT_ENTRY_P;
@@ -759,6 +794,54 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, 
uint64_t val)
 return iaig;
 }
 
+static inline bool vtd_queued_inv_enable_check(IntelIOMMUState *s)
+{
+return s->iq_tail == 0;
+}
+
+static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s)
+{
+return s->qi_enabled && (s->iq_tail == s->iq_head) &&
+   (s->iq_last_desc_type == VTD_INV_DESC_WAIT);
+}
+
+static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
+{
+uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG);
+
+VTD_DPRINTF(INV, "Queued Invalidation Enable %s", (en ? "on" : "off"));
+if (en) {
+if (vtd_queued_inv_enable_check(s)) {
+s->iq = iqa_val & VTD_IQA_IQA_MASK;
+/* 2^(x+8) entries */
+s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
+s->qi_enabled = true;
+VTD_DPRINTF(INV, "DMAR_IQA_REG 0x%"PRIx64, iqa_val);
+VTD_DPRINTF(INV, "Invalidation Queue addr 0x%"PRIx64 " size %d",
+s->iq, s->iq_size);
+/* Ok - report back to driver */
+vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
+} else {
+VTD_DPRINTF(GENERAL, "error: can't enable Queued Invalidation: "
+"tail %"PRIu16, s->iq_tail);
+}
+} else {
+if (vtd_queued_inv_disable_check(s)) {
+/* disable Queued Invalidation */
+vtd_set_quad_raw(s, DMAR_IQH_REG, 0);
+s->iq_head = 0;
+s->qi_enabled = false;
+/* Ok - report back to driver */
+vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0);
+} else {
+VTD_DPRINTF(GENERAL, "error: can't disable Queued Invalidation: "
+"head %"PRIu16 ", tail %"PRIu16
+", last_descriptor %"PRIu8,
+s->iq_head, s->iq_tail, s->iq_last_desc_type);
+}
+}
+}
+
 /* Set Root Table Pointer */
 static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
 {
@@ -804,6 +887,10 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
 /* Set/update the root-table pointer */
 vtd_handle_gcmd_srtp(s);
 }
+if (changed & VTD_GCMD_QIE) {
+/* Queued Invalidation Enable */
+vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE);
+}
 }
 
 /* Handle write to Context Command Register */
@@ -814,6 +901,11 @@ static void vtd_handle_ccmd_write(IntelIOMMUState *s)
 
 /* Context-cache invalidation request */
 if (val & VTD_CCMD_ICC) {
+if (s-

Re: [Qemu-devel] [PATCH v4 0/8] intel-iommu: introduce Intel IOMMU (VT-d) emulation to q35 chipset

2014-08-29 Thread Le Tan
Hi,

2014-08-29 5:12 GMT+08:00 Michael S. Tsirkin :
> On Sat, Aug 16, 2014 at 01:55:36PM +0800, Le Tan wrote:
>> Hi,
>>
>> These patches are intended to introduce Intel IOMMU (VT-d) emulation to q35
>> chipset. The major job in these patches is to add support for emulating Intel
>> IOMMU according to the VT-d specification, including basic responses to CSRs
>> accesses, the logics of DMAR (DMA remapping) and DMA memory address
>> translations.
>
> Thanks, I applied this in my tree.
> Will send upstream in the next pull request.
> I hope this happened soon enough for you to meet your timing
> requirements?

I am sorry that I am inconvenient to access the Internet these days,
so I can't response timely. There are no timing requirements for GSoC.
:)
Thanks very much!

Le

>
>> Features implemented for now are:
>> 1. Response to important CSRs accesses;
>> 2. DMAR (DMA remapping) without PASID support;
>> 3. Primary fault logging;
>> 4. Support both register-based and queued invalidation for IOTLB and context
>>cache invalidation;
>> 5. Add DMAR table to ACPI tables to expose VT-d to BIOS;
>> 6. Add "-machine iommu=on|off" option to enable/disable VT-d;
>> 7. Only one DMAR unit for all the devices of PCI Segment 0;
>> 8. Context-cache and IOTLB.
>>
>> Testing:
>> 1. L1 guest with Linux with intel_iommu=on can interact with VT-d and boot
>> smoothly, and there exists information about VT-d in the log of kernel;
>> 2. Run L1 with VT-d, L2 guest with Linux can boot smoothly withou PCI device
>> passthrough;
>> 3. Run L1 with VT-d and "-soundhw ac97 (QEMU_AUDIO_DRV=alsa)", then assign 
>> the
>> sound card to L2; L2 can boot smoothly with legacy PCI assignment and I can
>> hear the music played in L2 from the host speakers;
>> 4. Jailhouse hypervisor can run smoothly (tested by Jan).
>> 5. Run L1 with VT-d and e1000 network card, then assign e1000 to L2; L2 will 
>> be
>> STUCK when booting. This still remains unsolved now. As far as I know, I 
>> suppose
>> that the L2 crashes when doing e1000_probe(). The QEMU of L1 will dump
>> something with "KVM: entry failed, hardware error 0x0", and the KVM of host
>> will print "nested_vmx_exit_handled failed vm entry 7". Unlike assigning the
>> sound card, after being assigned to L2, there is no translation entry of 
>> e1000
>> through VT-d, which I think means that e1000 doesn't issue any DMA access 
>> during
>> the boot of L2. Sometimes the kernel of L2 will print "divide error" during
>> booting. Maybe it results from the lack of reset mechanism.
>> 6. VFIO is tested and is similar to legacy pci assignment.
>>
>> TODO:
>> 1. Fix the bug of legacy PCI assignment;
>> 2. Add unit test for DMAR ACPI table;
>> 3. Add support for PCIE-to-PCIE bridge.
>>
>> Changes since v3:
>> *address reviewing suggestions given by Jan and Michael
>> -implement Context-cache and IOTLB
>> -remove 'inline' keyword from most functions
>> -rename all the functions with prefix vtd_
>> -clean up constant definitions
>>
>> Changes since v2:
>> *address reviewing suggestions given by Jan
>> -add support for primary fault logging
>> -add support for queued invalidation
>>
>> Changes since v1:
>> *address reviewing suggestions given by Michael, Paolo, Stefan and Jan
>> -split intel_iommu.h to include/hw/i386/intel_iommu.h and
>>  hw/i386/intel_iommu_internal.h
>> -change the copyright information
>> -change D() to VTD_DPRINTF()
>> -remove dead code
>> -rename constant definitions with consistent prefix VTD_
>> -rename some struct definitions according to QEMU standard
>> -rename some CSRs access functions
>> -use endian-save functions to access CSRs
>> -change machine option to "iommu=on|off"
>>
>> Thanks very much!
>>
>> Git trees:
>> https://github.com/tamlok/qemu
>>
>> Le Tan (8):
>>   iommu: add is_write as a parameter to the translate function of
>> MemoryRegionIOMMUOps
>>   intel-iommu: introduce Intel IOMMU (VT-d) emulation
>>   intel-iommu: add DMAR table to ACPI tables
>>   intel-iommu: add Intel IOMMU emulation to q35 and add a machine option
>> "iommu" as a switch
>>   intel-iommu: fix coding style issues around in q35.c and machine.c
>>   intel-iommu: add supports for queued invalidation interface
>>   intel-iommu: add context-cache to cache context-entry
>>   intel-iommu: add IOTLB using hash table
>>
>>  exec.c   

[Qemu-devel] [PATCH 0/4] replace some fprintf(stderr, ...) with error_report()

2014-05-09 Thread Le Tan
This series of patches replaces some more occurrences of fprintf(stderr, ...)
with error_report() and removes the trailing "\n". Some of them are not changed
because I am not sure if they should be changed. There are some inconsistency
while dealing with macros that involves fprintf(stderr,...), that is, some of
them remain the same while some are changed to error_report().
I have 43 more patches (most of them touch files in hw/) and would send them
out once the first 4 patches are fine.

Le Tan (4):
  arch_init: replace fprintf(stderr,...) with error_report() in
arch_init.c
  audio: replace fprintf(stderr,...) with error_report() in audio
  block: replace fprintf(stderr,...) with error_report() in block/
  bsd-user: replace fprintf(stderr,...) with error_report()

 arch_init.c|   36 ---
 audio/spiceaudio.c |2 +-
 audio/wavcapture.c |4 +-
 block-migration.c  |4 +-
 block.c|4 +-
 block/linux-aio.c  |4 +-
 block/nbd-client.h |2 +-
 block/qcow2-cluster.c  |4 +-
 block/qcow2-refcount.c |  114 
 block/qcow2.c  |   16 +++
 block/quorum.c |4 +-
 block/raw-posix.c  |8 ++--
 block/raw-win32.c  |6 +--
 block/ssh.c|4 +-
 block/vdi.c|   14 +++---
 block/vmdk.c   |   21 -
 block/vpc.c|4 +-
 block/vvfat.c  |  108 ++---
 blockdev.c |6 +--
 bsd-user/bsdload.c |2 +-
 bsd-user/elfload.c |2 +-
 bsd-user/main.c|   14 +++---
 22 files changed, 189 insertions(+), 194 deletions(-)

-- 
1.7.9.5




[Qemu-devel] [PATCH 2/4] audio: replace fprintf(stderr, ...) with error_report() in audio

2014-05-09 Thread Le Tan
Replace fprintf(stderr,...) with error_report() in files audio/*.
The trailing "\n"s of the @fmt argument have been removed
because @fmt of error_report() should not contain newline.

Signed-off-by: Le Tan 
---
 audio/spiceaudio.c |2 +-
 audio/wavcapture.c |4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/audio/spiceaudio.c b/audio/spiceaudio.c
index fceee50..7b79bed 100644
--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -105,7 +105,7 @@ static int rate_get_samples (struct audio_pcm_info *info, 
SpiceRateCtl *rate)
 bytes = muldiv64 (ticks, info->bytes_per_second, get_ticks_per_sec ());
 samples = (bytes - rate->bytes_sent) >> info->shift;
 if (samples < 0 || samples > 65536) {
-fprintf (stderr, "Resetting rate control (%" PRId64 " samples)\n", 
samples);
+error_report("Resetting rate control (%" PRId64 " samples)", samples);
 rate_start (rate);
 samples = 0;
 }
diff --git a/audio/wavcapture.c b/audio/wavcapture.c
index 9d94623..60f7a72 100644
--- a/audio/wavcapture.c
+++ b/audio/wavcapture.c
@@ -63,8 +63,8 @@ static void wav_destroy (void *opaque)
 }
 doclose:
 if (fclose (wav->f)) {
-fprintf (stderr, "wav_destroy: fclose failed: %s",
- strerror (errno));
+error_report("wav_destroy: fclose failed: %s",
+ strerror (errno));
 }
 }
 
-- 
1.7.9.5




[Qemu-devel] [PATCH 1/4] arch_init: replace fprintf(stderr, ...) with error_report() in arch_init.c

2014-05-09 Thread Le Tan
Replace fprintf(stderr,...) with error_report() in the file
arch_init.c. The trailing "\n"s of the @fmt argument have been removed
because @fmt of error_report() should not contain newline.

Signed-off-by: Le Tan 
---
 arch_init.c |   36 +---
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 60c975d..0b41475 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -921,12 +921,12 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void 
*host)
 xh_len = qemu_get_be16(f);
 
 if (xh_flags != ENCODING_FLAG_XBZRLE) {
-fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
+error_report("Failed to load XBZRLE page - wrong compression!");
 return -1;
 }
 
 if (xh_len > TARGET_PAGE_SIZE) {
-fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
+error_report("Failed to load XBZRLE page - len overflow!");
 return -1;
 }
 /* load data and decode */
@@ -936,11 +936,11 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void 
*host)
 ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
TARGET_PAGE_SIZE);
 if (ret == -1) {
-fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
+error_report("Failed to load XBZRLE page - decode error!");
 rc = -1;
 } else  if (ret > TARGET_PAGE_SIZE) {
-fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
-ret, TARGET_PAGE_SIZE);
+error_report("Failed to load XBZRLE page - size %d exceeds %d!",
+ ret, TARGET_PAGE_SIZE);
 abort();
 }
 
@@ -957,7 +957,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
 
 if (flags & RAM_SAVE_FLAG_CONTINUE) {
 if (!block) {
-fprintf(stderr, "Ack, bad migration stream!\n");
+error_report("Ack, bad migration stream!");
 return NULL;
 }
 
@@ -973,7 +973,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
 return memory_region_get_ram_ptr(block->mr) + offset;
 }
 
-fprintf(stderr, "Can't find block %s!\n", id);
+error_report("Can't find block %s!", id);
 return NULL;
 }
 
@@ -1026,10 +1026,10 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
 if (!strncmp(id, block->idstr, sizeof(id))) {
 if (block->length != length) {
-fprintf(stderr,
-"Length mismatch: %s: " RAM_ADDR_FMT
-" in != " RAM_ADDR_FMT "\n", id, 
length,
-block->length);
+error_report(
+ "Length mismatch: %s: " 
RAM_ADDR_FMT
+ " in != " RAM_ADDR_FMT, id, 
length,
+ block->length);
 ret =  -EINVAL;
 goto done;
 }
@@ -1038,8 +1038,8 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
 }
 
 if (!block) {
-fprintf(stderr, "Unknown ramblock \"%s\", cannot "
-"accept migration\n", id);
+error_report("Unknown ramblock \"%s\", cannot "
+ "accept migration", id);
 ret = -EINVAL;
 goto done;
 }
@@ -1186,12 +1186,10 @@ void select_soundhw(const char *optarg)
 
 if (!c->name) {
 if (l > 80) {
-fprintf(stderr,
-"Unknown sound card name (too big to show)\n");
+error_report("Unknown sound card name (too big to show)");
 }
 else {
-fprintf(stderr, "Unknown sound card name `%.*s'\n",
-(int) l, p);
+error_report("Unknown sound card name `%.*s'", (int) l, p);
 }
 bad_card = 1;
 }
@@ -1214,13 +1212,13 @@ void audio_init(void)
 if (c->enabled) {
 if (c->isa) {
 if (!isa_bus) {
-fprintf(stderr, "ISA bus not available for %s\n", c->name);
+err

[Qemu-devel] [PATCH 4/4] bsd-user: replace fprintf(stderr, ...) with error_report()

2014-05-09 Thread Le Tan
Replace fprintf(stderr,...) with error_report() in files bsd-user/*.
The trailing "\n"s of the @fmt argument have been removed
because @fmt of error_report() should not contain newline.

Signed-off-by: Le Tan 
---
 bsd-user/bsdload.c |2 +-
 bsd-user/elfload.c |2 +-
 bsd-user/main.c|   14 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c
index 2abc713..6b52e08 100644
--- a/bsd-user/bsdload.c
+++ b/bsd-user/bsdload.c
@@ -183,7 +183,7 @@ int loader_exec(const char * filename, char ** argv, char 
** envp,
 && bprm.buf[3] == 'F') {
 retval = load_elf_binary(&bprm,regs,infop);
 } else {
-fprintf(stderr, "Unknown binary format\n");
+error_report("Unknown binary format");
 return -1;
 }
 }
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 93fd9e4..95652b1 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -628,7 +628,7 @@ static abi_ulong copy_elf_strings(int argc,char ** argv, 
void **page,
 while (argc-- > 0) {
 tmp = argv[argc];
 if (!tmp) {
-fprintf(stderr, "VFS: argc is wrong");
+error_report("VFS: argc is wrong");
 exit(-1);
 }
 tmp1 = tmp;
diff --git a/bsd-user/main.c b/bsd-user/main.c
index f81ba55..3825e76 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -378,8 +378,8 @@ void cpu_loop(CPUX86State *env)
 #endif
 default:
 pc = env->segs[R_CS].base + env->eip;
-fprintf(stderr, "qemu: 0x%08lx: unhandled CPU exception 0x%x - 
aborting\n",
-(long)pc, trapnr);
+error_report("qemu: 0x%08lx: unhandled CPU exception 0x%x - 
aborting",
+ (long)pc, trapnr);
 abort();
 }
 process_pending_signals(env);
@@ -752,7 +752,7 @@ int main(int argc, char **argv)
 module_call_init(MODULE_INIT_QOM);
 
 if ((envlist = envlist_create()) == NULL) {
-(void) fprintf(stderr, "Unable to allocate envlist\n");
+(void) error_report("Unable to allocate envlist");
 exit(1);
 }
 
@@ -794,7 +794,7 @@ int main(int argc, char **argv)
 } else if (!strcmp(r, "ignore-environment")) {
 envlist_free(envlist);
 if ((envlist = envlist_create()) == NULL) {
-(void) fprintf(stderr, "Unable to allocate envlist\n");
+(void) error_report("Unable to allocate envlist");
 exit(1);
 }
 } else if (!strcmp(r, "U")) {
@@ -816,7 +816,7 @@ int main(int argc, char **argv)
 qemu_host_page_size = atoi(argv[optind++]);
 if (qemu_host_page_size == 0 ||
 (qemu_host_page_size & (qemu_host_page_size - 1)) != 0) {
-fprintf(stderr, "page size must be a power of two\n");
+error_report("page size must be a power of two");
 exit(1);
 }
 } else if (!strcmp(r, "g")) {
@@ -910,7 +910,7 @@ int main(int argc, char **argv)
qemu_host_page_size */
 env = cpu_init(cpu_model);
 if (!env) {
-fprintf(stderr, "Unable to find CPU definition\n");
+error_report("Unable to find CPU definition");
 exit(1);
 }
 cpu = ENV_GET_CPU(env);
@@ -1014,7 +1014,7 @@ int main(int argc, char **argv)
 #ifndef TARGET_ABI32
 /* enable 64 bit mode if possible */
 if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
-fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
+error_report("The selected x86 CPU does not support 64 bit mode");
 exit(1);
 }
 env->cr[4] |= CR4_PAE_MASK;
-- 
1.7.9.5




[Qemu-devel] [PATCH 3/4] block: replace fprintf(stderr, ...) with error_report() in block/

2014-05-09 Thread Le Tan
Replace fprintf(stderr,...) with error_report() in files block/*, block.c,
block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument
have been removed because @fmt of error_report() should not contain newline.

Signed-off-by: Le Tan 
---
 block-migration.c  |4 +-
 block.c|4 +-
 block/linux-aio.c  |4 +-
 block/nbd-client.h |2 +-
 block/qcow2-cluster.c  |4 +-
 block/qcow2-refcount.c |  114 
 block/qcow2.c  |   16 +++
 block/quorum.c |4 +-
 block/raw-posix.c  |8 ++--
 block/raw-win32.c  |6 +--
 block/ssh.c|4 +-
 block/vdi.c|   14 +++---
 block/vmdk.c   |   21 -
 block/vpc.c|4 +-
 block/vvfat.c  |  108 ++---
 blockdev.c |6 +--
 16 files changed, 160 insertions(+), 163 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 56951e0..5bcf7c8 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -790,7 +790,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
 
 bs = bdrv_find(device_name);
 if (!bs) {
-fprintf(stderr, "Error unknown block device %s\n",
+error_report("Error unknown block device %s",
 device_name);
 return -EINVAL;
 }
@@ -833,7 +833,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
(addr == 100) ? '\n' : '\r');
 fflush(stdout);
 } else if (!(flags & BLK_MIG_FLAG_EOS)) {
-fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
+error_report("Unknown block migration flags: %#x", flags);
 return -EINVAL;
 }
 ret = qemu_file_get_error(f);
diff --git a/block.c b/block.c
index b749d31..7dc4acb 100644
--- a/block.c
+++ b/block.c
@@ -2694,8 +2694,8 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t 
offset,
  * if it has been enabled.
  */
 if (bs->io_limits_enabled) {
-fprintf(stderr, "Disabling I/O throttling on '%s' due "
-"to synchronous I/O.\n", bdrv_get_device_name(bs));
+error_report("Disabling I/O throttling on '%s' due "
+ "to synchronous I/O.", bdrv_get_device_name(bs));
 bdrv_io_limits_disable(bs);
 }
 
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 53434e2..b706a59 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -162,8 +162,8 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void 
*aio_ctx, int fd,
break;
 /* Currently Linux kernel does not support other operations */
 default:
-fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
-__func__, type);
+error_report("%s: invalid AIO request type 0x%x.",
+ __func__, type);
 goto out_free_aiocb;
 }
 io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
diff --git a/block/nbd-client.h b/block/nbd-client.h
index f2a6337..74178f4 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -9,7 +9,7 @@
 
 #if defined(DEBUG_NBD)
 #define logout(fmt, ...) \
-fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
+error_report("nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
 #else
 #define logout(fmt, ...) ((void)0)
 #endif
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 76d2bcf..b1c8daf 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -67,8 +67,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t 
min_size,
 }
 
 #ifdef DEBUG_ALLOC2
-fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
-s->l1_size, new_l1_size);
+error_report("grow l1_table from %d to %" PRId64,
+ s->l1_size, new_l1_size);
 #endif
 
 new_l1_size2 = sizeof(uint64_t) * new_l1_size;
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index e79895d..e494474 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -219,9 +219,9 @@ static int alloc_refcount_block(BlockDriverState *bs,
 }
 
 #ifdef DEBUG_ALLOC2
-fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
-" at %" PRIx64 "\n",
-refcount_table_index, cluster_index << s->cluster_bits, new_block);
+error_report("qcow2: Allocate refcount block %d for %" PRIx64
+ " at %" PRIx64,
+ refcount_table_index, cluster_index << s->cluster_bits, 
new_block);
 #endif
 
 if (in_same_refcount_block(s, new_block, cluster_i

Re: [Qemu-devel] [PATCH 3/4] block: replace fprintf(stderr, ...) with error_report() in block/

2014-05-11 Thread Le Tan
2014-05-10 21:18 GMT+08:00 Peter Crosthwaite :
> On Sat, May 10, 2014 at 9:55 AM, Le Tan  wrote:
>> Replace fprintf(stderr,...) with error_report() in files block/*, block.c,
>> block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument
>> have been removed because @fmt of error_report() should not contain newline.
>>
>> Signed-off-by: Le Tan 
>> ---
>>  block-migration.c  |4 +-
>>  block.c|4 +-
>>  block/linux-aio.c  |4 +-
>>  block/nbd-client.h |2 +-
>>  block/qcow2-cluster.c  |4 +-
>>  block/qcow2-refcount.c |  114 
>> 
>>  block/qcow2.c  |   16 +++
>>  block/quorum.c |4 +-
>>  block/raw-posix.c  |8 ++--
>>  block/raw-win32.c  |6 +--
>>  block/ssh.c|4 +-
>>  block/vdi.c|   14 +++---
>>  block/vmdk.c   |   21 -
>>  block/vpc.c|4 +-
>>  block/vvfat.c  |  108 ++---
>>  blockdev.c |6 +--
>>  16 files changed, 160 insertions(+), 163 deletions(-)
>>
>> diff --git a/block-migration.c b/block-migration.c
>> index 56951e0..5bcf7c8 100644
>> --- a/block-migration.c
>> +++ b/block-migration.c
>> @@ -790,7 +790,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
>> version_id)
>>
>>  bs = bdrv_find(device_name);
>>  if (!bs) {
>> -fprintf(stderr, "Error unknown block device %s\n",
>> +error_report("Error unknown block device %s",
>>  device_name);
>>  return -EINVAL;
>>  }
>> @@ -833,7 +833,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
>> version_id)
>> (addr == 100) ? '\n' : '\r');
>>  fflush(stdout);
>>  } else if (!(flags & BLK_MIG_FLAG_EOS)) {
>> -fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
>> +error_report("Unknown block migration flags: %#x", flags);
>>  return -EINVAL;
>>  }
>>  ret = qemu_file_get_error(f);
>> diff --git a/block.c b/block.c
>> index b749d31..7dc4acb 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -2694,8 +2694,8 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t 
>> offset,
>>   * if it has been enabled.
>>   */
>>  if (bs->io_limits_enabled) {
>> -fprintf(stderr, "Disabling I/O throttling on '%s' due "
>> -"to synchronous I/O.\n", bdrv_get_device_name(bs));
>> +error_report("Disabling I/O throttling on '%s' due "
>> + "to synchronous I/O.", bdrv_get_device_name(bs));
>>  bdrv_io_limits_disable(bs);
>>  }
>>
>> diff --git a/block/linux-aio.c b/block/linux-aio.c
>> index 53434e2..b706a59 100644
>> --- a/block/linux-aio.c
>> +++ b/block/linux-aio.c
>> @@ -162,8 +162,8 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void 
>> *aio_ctx, int fd,
>> break;
>>  /* Currently Linux kernel does not support other operations */
>>  default:
>> -fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
>> -__func__, type);
>> +error_report("%s: invalid AIO request type 0x%x.",
>> + __func__, type);
>>  goto out_free_aiocb;
>>  }
>>  io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
>> diff --git a/block/nbd-client.h b/block/nbd-client.h
>> index f2a6337..74178f4 100644
>> --- a/block/nbd-client.h
>> +++ b/block/nbd-client.h
>> @@ -9,7 +9,7 @@
>>
>>  #if defined(DEBUG_NBD)
>>  #define logout(fmt, ...) \
>> -fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
>> +error_report("nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
>
> So i'm not sure we want to convert debug printfery to error_report.
> There's very good value in converting the printfs with user
> visibility, but ones like this seem intended for developers only as
> throwaway-output. My thinking is that this is a lower level output
> than error_report. For instance, as a developer you may do something
> to break your error API yet you still want your debug printfery.
>

[Qemu-devel] [PATCH 0/2] replace some fprintf(stderr, ...) with error_report()

2014-05-20 Thread Le Tan
This series of patches replaces some more occurrences of fprintf(stderr, ...)
with error_report() and removes the trailing "\n". Those for debugging are not
changed.

Le Tan (2):
  arch_init: replace fprintf(stderr,...) with error_report()
  block: replace fprintf(stderr,...) with error_report()

 arch_init.c|   32 +++---
 block-migration.c  |5 +--
 block.c|4 +-
 block/qcow2-refcount.c |  114 
 block/qcow2.c  |   18 
 block/raw-posix.c  |   10 ++---
 block/raw-win32.c  |6 +--
 block/ssh.c|2 +-
 block/vdi.c|   15 ---
 block/vmdk.c   |   15 +++
 block/vpc.c|4 +-
 block/vvfat.c  |   69 +++--
 blockdev.c |6 +--
 13 files changed, 147 insertions(+), 153 deletions(-)

-- 
1.7.9.5




[Qemu-devel] [PATCH 2/2] block: replace fprintf(stderr, ...) with error_report()

2014-05-20 Thread Le Tan
Replace fprintf(stderr,...) with error_report() in files block/*, block.c,
block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument
have been removed because @fmt of error_report() should not contain newline.

Signed-off-by: Le Tan 
---
 block-migration.c  |5 +--
 block.c|4 +-
 block/qcow2-refcount.c |  114 
 block/qcow2.c  |   18 
 block/raw-posix.c  |   10 ++---
 block/raw-win32.c  |6 +--
 block/ssh.c|2 +-
 block/vdi.c|   15 ---
 block/vmdk.c   |   15 +++
 block/vpc.c|4 +-
 block/vvfat.c  |   69 +++--
 blockdev.c |6 +--
 12 files changed, 132 insertions(+), 136 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 56951e0..1660565 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -790,8 +790,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
 
 bs = bdrv_find(device_name);
 if (!bs) {
-fprintf(stderr, "Error unknown block device %s\n",
-device_name);
+error_report("Error unknown block device %s", device_name);
 return -EINVAL;
 }
 
@@ -833,7 +832,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
(addr == 100) ? '\n' : '\r');
 fflush(stdout);
 } else if (!(flags & BLK_MIG_FLAG_EOS)) {
-fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
+error_report("Unknown block migration flags: %#x", flags);
 return -EINVAL;
 }
 ret = qemu_file_get_error(f);
diff --git a/block.c b/block.c
index c90c71a..14f581b 100644
--- a/block.c
+++ b/block.c
@@ -2698,8 +2698,8 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t 
offset,
  * if it has been enabled.
  */
 if (bs->io_limits_enabled) {
-fprintf(stderr, "Disabling I/O throttling on '%s' due "
-"to synchronous I/O.\n", bdrv_get_device_name(bs));
+error_report("Disabling I/O throttling on '%s' due "
+ "to synchronous I/O.", bdrv_get_device_name(bs));
 bdrv_io_limits_disable(bs);
 }
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 9507aef..23d44b0 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -795,7 +795,7 @@ void qcow2_free_clusters(BlockDriverState *bs,
 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
 ret = update_refcount(bs, offset, size, -1, type);
 if (ret < 0) {
-fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
+error_report("qcow2_free_clusters failed: %s", strerror(-ret));
 /* TODO Remember the clusters to free them later and avoid leaking */
 }
 }
@@ -1040,14 +1040,14 @@ static void inc_refcounts(BlockDriverState *bs,
 cluster_offset += s->cluster_size) {
 k = cluster_offset >> s->cluster_bits;
 if (k >= refcount_table_size) {
-fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
-"the end of the image file, can't properly check refcounts.\n",
-cluster_offset);
+error_report("Warning: cluster offset=0x%" PRIx64 " is after "
+ "the end of the image file, can't properly check 
refcounts.",
+ cluster_offset);
 res->check_errors++;
 } else {
 if (++refcount_table[k] == 0) {
-fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
-"\n", cluster_offset);
+error_report("ERROR: overflow cluster offset=0x%" PRIx64,
+ cluster_offset);
 res->corruptions++;
 }
 }
@@ -1091,9 +1091,9 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
 case QCOW2_CLUSTER_COMPRESSED:
 /* Compressed clusters don't have QCOW_OFLAG_COPIED */
 if (l2_entry & QCOW_OFLAG_COPIED) {
-fprintf(stderr, "ERROR: cluster %" PRId64 ": "
-"copied flag must never be set for compressed "
-"clusters\n", l2_entry >> s->cluster_bits);
+error_report("ERROR: cluster %" PRId64 ": "
+ "copied flag must never be set for compressed "
+ "clusters", l2_entry >> s->clus

[Qemu-devel] [PATCH 1/2] arch_init: replace fprintf(stderr, ...) with error_report()

2014-05-20 Thread Le Tan
Replace fprintf(stderr,...) with error_report() in the file
arch_init.c. The trailing "\n"s of the @fmt argument have been removed
because @fmt of error_report() should not contain newline.

Signed-off-by: Le Tan 
---
 arch_init.c |   32 +++-
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 685ba0e..9f1a174 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -975,12 +975,12 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void 
*host)
 xh_len = qemu_get_be16(f);
 
 if (xh_flags != ENCODING_FLAG_XBZRLE) {
-fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
+error_report("Failed to load XBZRLE page - wrong compression!");
 return -1;
 }
 
 if (xh_len > TARGET_PAGE_SIZE) {
-fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
+error_report("Failed to load XBZRLE page - len overflow!");
 return -1;
 }
 /* load data and decode */
@@ -989,7 +989,7 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void 
*host)
 /* decode RLE */
 if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
  TARGET_PAGE_SIZE) == -1) {
-fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
+error_report("Failed to load XBZRLE page - decode error!");
 return -1;
 }
 
@@ -1006,7 +1006,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
 
 if (flags & RAM_SAVE_FLAG_CONTINUE) {
 if (!block) {
-fprintf(stderr, "Ack, bad migration stream!\n");
+error_report("Ack, bad migration stream!");
 return NULL;
 }
 
@@ -1022,7 +1022,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
 return memory_region_get_ram_ptr(block->mr) + offset;
 }
 
-fprintf(stderr, "Can't find block %s!\n", id);
+error_report("Can't find block %s!", id);
 return NULL;
 }
 
@@ -1075,10 +1075,9 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
 if (!strncmp(id, block->idstr, sizeof(id))) {
 if (block->length != length) {
-fprintf(stderr,
-"Length mismatch: %s: " RAM_ADDR_FMT
-" in != " RAM_ADDR_FMT "\n", id, length,
-block->length);
+error_report("Length mismatch: %s: " RAM_ADDR_FMT
+ " in != " RAM_ADDR_FMT, id, length,
+ block->length);
 ret =  -EINVAL;
 goto done;
 }
@@ -1087,8 +1086,8 @@ static int ram_load(QEMUFile *f, void *opaque, int 
version_id)
 }
 
 if (!block) {
-fprintf(stderr, "Unknown ramblock \"%s\", cannot "
-"accept migration\n", id);
+error_report("Unknown ramblock \"%s\", cannot "
+ "accept migration", id);
 ret = -EINVAL;
 goto done;
 }
@@ -1243,12 +1242,11 @@ void select_soundhw(const char *optarg)
 
 if (!c->name) {
 if (l > 80) {
-fprintf(stderr,
-"Unknown sound card name (too big to show)\n");
+error_report("Unknown sound card name (too big to show)");
 }
 else {
-fprintf(stderr, "Unknown sound card name `%.*s'\n",
-(int) l, p);
+error_report("Unknown sound card name `%.*s'",
+ (int) l, p);
 }
 bad_card = 1;
 }
@@ -1271,13 +1269,13 @@ void audio_init(void)
 if (c->enabled) {
 if (c->isa) {
 if (!isa_bus) {
-fprintf(stderr, "ISA bus not available for %s\n", c->name);
+error_report("ISA bus not available for %s", c->name);
 exit(1);
 }
 c->init.init_isa(isa_bus);
 } else {
 if (!pci_bus) {
-fprintf(stderr, "PCI bus not available for %s\n", c->name);
+error_report("PCI bus not available for %s", c->name);
 exit(1);
 }
 c->init.init_pci(pci_bus);
-- 
1.7.9.5




[Qemu-devel] GSoC student self-introduction

2014-05-20 Thread Le Tan
Hi, I am a student participating in GSoC 2014. My brief introduction
is given below. I am looking forward to studying QEMU and finishing
the GSoC project with the community. Thank you very much!

Name: Le Tan
IRC nick: #tamlok
Public git repo URL: https://github.com/tamlok/qemu.git
Project: Intel IOMMU (VT-d) Emulation
Summary: I will work on adding supports for emulating Intel IOMMUs
according to Intel VT-d specification, based on an existing VT-d
framework. This project requires me to implement the VT-d emulation
and add the minimally required VT-d features for q35 chipset to the
IOMMU framework in QEMU. I will concentrate on finishing this project
with the help of my mentor and the community this summer.

Regards,
Le Tan



Re: [Qemu-devel] [PATCH 1/4] arch_init: replace fprintf(stderr, ...) with error_report() in arch_init.c

2014-05-23 Thread Le Tan
There is a newer version of patch to fix this problem. I forgot to add
the version number when I sent the new patch. Add this comment just to
indicate that this patch should be dropped. :)

2014-05-10 7:55 GMT+08:00 Le Tan :
> Replace fprintf(stderr,...) with error_report() in the file
> arch_init.c. The trailing "\n"s of the @fmt argument have been removed
> because @fmt of error_report() should not contain newline.
>
> Signed-off-by: Le Tan 
> ---
>  arch_init.c |   36 +---
>  1 file changed, 17 insertions(+), 19 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 60c975d..0b41475 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -921,12 +921,12 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, 
> void *host)
>  xh_len = qemu_get_be16(f);
>
>  if (xh_flags != ENCODING_FLAG_XBZRLE) {
> -fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
> +error_report("Failed to load XBZRLE page - wrong compression!");
>  return -1;
>  }
>
>  if (xh_len > TARGET_PAGE_SIZE) {
> -fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
> +error_report("Failed to load XBZRLE page - len overflow!");
>  return -1;
>  }
>  /* load data and decode */
> @@ -936,11 +936,11 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, 
> void *host)
>  ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
> TARGET_PAGE_SIZE);
>  if (ret == -1) {
> -fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
> +error_report("Failed to load XBZRLE page - decode error!");
>  rc = -1;
>  } else  if (ret > TARGET_PAGE_SIZE) {
> -fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
> -ret, TARGET_PAGE_SIZE);
> +error_report("Failed to load XBZRLE page - size %d exceeds %d!",
> + ret, TARGET_PAGE_SIZE);
>  abort();
>  }
>
> @@ -957,7 +957,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
>
>  if (flags & RAM_SAVE_FLAG_CONTINUE) {
>  if (!block) {
> -fprintf(stderr, "Ack, bad migration stream!\n");
> +error_report("Ack, bad migration stream!");
>  return NULL;
>  }
>
> @@ -973,7 +973,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
>  return memory_region_get_ram_ptr(block->mr) + offset;
>  }
>
> -fprintf(stderr, "Can't find block %s!\n", id);
> +error_report("Can't find block %s!", id);
>  return NULL;
>  }
>
> @@ -1026,10 +1026,10 @@ static int ram_load(QEMUFile *f, void *opaque, int 
> version_id)
>  QTAILQ_FOREACH(block, &ram_list.blocks, next) {
>  if (!strncmp(id, block->idstr, sizeof(id))) {
>  if (block->length != length) {
> -fprintf(stderr,
> -"Length mismatch: %s: " RAM_ADDR_FMT
> -" in != " RAM_ADDR_FMT "\n", id, 
> length,
> -block->length);
> +error_report(
> + "Length mismatch: %s: " 
> RAM_ADDR_FMT
> + " in != " RAM_ADDR_FMT, id, 
> length,
> + block->length);
>  ret =  -EINVAL;
>  goto done;
>  }
> @@ -1038,8 +1038,8 @@ static int ram_load(QEMUFile *f, void *opaque, int 
> version_id)
>  }
>
>  if (!block) {
> -fprintf(stderr, "Unknown ramblock \"%s\", cannot "
> -"accept migration\n", id);
> +error_report("Unknown ramblock \"%s\", cannot "
> + "accept migration", id);
>  ret = -EINVAL;
>  goto done;
>  }
> @@ -1186,12 +1186,10 @@ void select_soundhw(const char *optarg)
>
>  if (!c->name) {
>  if (l > 80) {
> -fprintf(stderr,
> -"Unknown sound card name (too big to 

Re: [Qemu-devel] [PATCH 3/4] block: replace fprintf(stderr, ...) with error_report() in block/

2014-05-23 Thread Le Tan
There is a newer version of patch to fix this problem. I forgot to add
the version number when I sent the new patch. Add this comment just to
indicate that this patch should be dropped. :)

2014-05-10 21:18 GMT+08:00 Peter Crosthwaite :
> On Sat, May 10, 2014 at 9:55 AM, Le Tan  wrote:
>> Replace fprintf(stderr,...) with error_report() in files block/*, block.c,
>> block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument
>> have been removed because @fmt of error_report() should not contain newline.
>>
>> Signed-off-by: Le Tan 
>> ---
>>  block-migration.c  |4 +-
>>  block.c|4 +-
>>  block/linux-aio.c  |4 +-
>>  block/nbd-client.h |2 +-
>>  block/qcow2-cluster.c  |4 +-
>>  block/qcow2-refcount.c |  114 
>> 
>>  block/qcow2.c  |   16 +++
>>  block/quorum.c |4 +-
>>  block/raw-posix.c  |8 ++--
>>  block/raw-win32.c  |6 +--
>>  block/ssh.c|4 +-
>>  block/vdi.c|   14 +++---
>>  block/vmdk.c   |   21 -
>>  block/vpc.c|4 +-
>>  block/vvfat.c  |  108 ++---
>>  blockdev.c |6 +--
>>  16 files changed, 160 insertions(+), 163 deletions(-)
>>
>> diff --git a/block-migration.c b/block-migration.c
>> index 56951e0..5bcf7c8 100644
>> --- a/block-migration.c
>> +++ b/block-migration.c
>> @@ -790,7 +790,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
>> version_id)
>>
>>  bs = bdrv_find(device_name);
>>  if (!bs) {
>> -fprintf(stderr, "Error unknown block device %s\n",
>> +error_report("Error unknown block device %s",
>>  device_name);
>>  return -EINVAL;
>>  }
>> @@ -833,7 +833,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
>> version_id)
>> (addr == 100) ? '\n' : '\r');
>>  fflush(stdout);
>>  } else if (!(flags & BLK_MIG_FLAG_EOS)) {
>> -fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
>> +error_report("Unknown block migration flags: %#x", flags);
>>  return -EINVAL;
>>  }
>>  ret = qemu_file_get_error(f);
>> diff --git a/block.c b/block.c
>> index b749d31..7dc4acb 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -2694,8 +2694,8 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t 
>> offset,
>>   * if it has been enabled.
>>   */
>>  if (bs->io_limits_enabled) {
>> -fprintf(stderr, "Disabling I/O throttling on '%s' due "
>> -"to synchronous I/O.\n", bdrv_get_device_name(bs));
>> +error_report("Disabling I/O throttling on '%s' due "
>> + "to synchronous I/O.", bdrv_get_device_name(bs));
>>  bdrv_io_limits_disable(bs);
>>  }
>>
>> diff --git a/block/linux-aio.c b/block/linux-aio.c
>> index 53434e2..b706a59 100644
>> --- a/block/linux-aio.c
>> +++ b/block/linux-aio.c
>> @@ -162,8 +162,8 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void 
>> *aio_ctx, int fd,
>> break;
>>  /* Currently Linux kernel does not support other operations */
>>  default:
>> -fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
>> -__func__, type);
>> +error_report("%s: invalid AIO request type 0x%x.",
>> + __func__, type);
>>  goto out_free_aiocb;
>>  }
>>  io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
>> diff --git a/block/nbd-client.h b/block/nbd-client.h
>> index f2a6337..74178f4 100644
>> --- a/block/nbd-client.h
>> +++ b/block/nbd-client.h
>> @@ -9,7 +9,7 @@
>>
>>  #if defined(DEBUG_NBD)
>>  #define logout(fmt, ...) \
>> -fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
>> +error_report("nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
>
> So i'm not sure we want to convert debug printfery to error_report.
> There's very good value in converting the printfs with user
> visibility, but ones like this seem intended for developers only as
> throwaway-output. My thinking is that this is a lower level output
> than error_report. For instance, as a developer you may do something
> to break your error API yet you still want your debug printfery.
> Wouldn't matter in this location, but there may be other parts of the
> tree where we don't want error_report relinace for debug
> instrumentation and it just seems better to keep it consistent.
>
> Thinking further afield, qemu_log may ultimately be the correct
> mechanism for this instead (I think thats what I have been using for
> new code recently anyway).
>
> Thoughts from others?
>
> Regards,
> Peter



Re: [Qemu-devel] [Qemu-trivial] [PATCH 2/2] block: replace fprintf(stderr, ...) with error_report()

2014-05-23 Thread Le Tan
I will fix the whitespace issues and resend this series with a new
version later.
Thanks very much! ;)

Le

2014-05-24 4:27 GMT+08:00 Michael Tokarev :
> 21.05.2014 04:10, Le Tan wrote:
>> Replace fprintf(stderr,...) with error_report() in files block/*, block.c,
>> block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument
>> have been removed because @fmt of error_report() should not contain newline.
>>
>> Signed-off-by: Le Tan 
>> ---
>>  block-migration.c  |5 +--
>>  block.c|4 +-
>>  block/qcow2-refcount.c |  114 
>> 
>>  block/qcow2.c  |   18 
>>  block/raw-posix.c  |   10 ++---
>>  block/raw-win32.c  |6 +--
>>  block/ssh.c|2 +-
>>  block/vdi.c|   15 ---
>>  block/vmdk.c   |   15 +++
>>  block/vpc.c|4 +-
>>  block/vvfat.c  |   69 +++--
>>  blockdev.c |6 +--
>>  12 files changed, 132 insertions(+), 136 deletions(-)
>
> That's a rather large -trivial patch... ;)
>
> Please check this patch using ./tools/checkpatch.pl - it will show
> many whitespace issues.
>
> I'd apply it to -trivial (after fixing the whitespace issues), all
> the actual changes look sane.  But Cc'ing kwolf and stefanha too
> (who are the block subsystem maintainers), because the patch touches
> too many of block code.
>
> Thanks,
>
> /mjt
>



Re: [Qemu-devel] [Qemu-trivial] [PATCH v3] block: replace fprintf(stderr, ...) with error_report()

2014-05-25 Thread Le Tan
Oh, yes, I may misunderstood it. I just ran ./scripts/checkpatch.pl to
check files that I touched and fixed those I can fix. So I will send
the v4 of this patch which just fix the whitespace issues based on v2.
Is that right? Forgive my being naive. :)
Thanks very much for all!

Le

2014-05-25 22:36 GMT+08:00 Michael Tokarev :
> 25.05.2014 18:29, Jan Kiszka wrote:
>> On 2014-05-25 10:44, Le Tan wrote:
>>> Replace fprintf(stderr,...) with error_report() in files block/*, block.c,
>>> block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument
>>> have been removed because @fmt of error_report() should not contain newline.
>>> Also fix some coding style issues.
>>
>> Let's do the "also" part in a separate patch. It's more than one or two
>> trivial pass-by style fixes, and some people may like the fprintf
>> conversion while having different views on the other changes.
>
> I think he misunderstood.
>
> I told him that his previous patch had its own whitespace issues,
> ie, it _adds_ whitespace errors.
>
> In another reply I told him it is usually a good idea to fix style
> issues in the code the patch touches -- this means, close to, if a
> line being changed for something else has an unrelated style issue,
> it should be fixed too.  (An example was space between function name
> and its arguments in fprintf argument list, while converting that
> fprintf to error_report).
>
> But it looks like he took this advise in much more broad way and
> fixed _all_ whitespace/style issues in the files he touches.  Oh
> well.. :)
>
> And yes, definitely, please don't mix it like this.  Usually, these
> code style issues should not be touched by its own, only if you
> change nearby code.
>
> Thanks,
>
> /mjt



[Qemu-devel] [PATCH v4] block: replace fprintf(stderr, ...) with error_report()

2014-05-25 Thread Le Tan
Replace fprintf(stderr,...) with error_report() in files block/*, block.c,
block-migration.c and blockdev.c. The trailing "\n"s of the @fmt argument
have been removed because @fmt of error_report() should not contain newline.

Signed-off-by: Le Tan 
---
 block-migration.c  |6 +--
 block.c|4 +-
 block/qcow2-refcount.c |  115 +-
 block/qcow2.c  |   16 +++---
 block/raw-posix.c  |8 ++-
 block/raw-win32.c  |6 +--
 block/ssh.c|2 +-
 block/vdi.c|   14 +++---
 block/vmdk.c   |   15 +++---
 block/vpc.c|4 +-
 block/vvfat.c  |  129 
 blockdev.c |6 +--
 12 files changed, 159 insertions(+), 166 deletions(-)

diff --git a/block-migration.c b/block-migration.c
index 56951e0..9cab084 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -790,8 +790,8 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
 
 bs = bdrv_find(device_name);
 if (!bs) {
-fprintf(stderr, "Error unknown block device %s\n",
-device_name);
+error_report("Error unknown block device %s",
+ device_name);
 return -EINVAL;
 }
 
@@ -833,7 +833,7 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
(addr == 100) ? '\n' : '\r');
 fflush(stdout);
 } else if (!(flags & BLK_MIG_FLAG_EOS)) {
-fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
+error_report("Unknown block migration flags: %#x", flags);
 return -EINVAL;
 }
 ret = qemu_file_get_error(f);
diff --git a/block.c b/block.c
index c90c71a..725c46a 100644
--- a/block.c
+++ b/block.c
@@ -2698,8 +2698,8 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t 
offset,
  * if it has been enabled.
  */
 if (bs->io_limits_enabled) {
-fprintf(stderr, "Disabling I/O throttling on '%s' due "
-"to synchronous I/O.\n", bdrv_get_device_name(bs));
+error_report("Disabling I/O throttling on '%s' due "
+ "to synchronous I/O.\n", bdrv_get_device_name(bs));
 bdrv_io_limits_disable(bs);
 }
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 9507aef..9f6d1e6 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -795,7 +795,7 @@ void qcow2_free_clusters(BlockDriverState *bs,
 BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
 ret = update_refcount(bs, offset, size, -1, type);
 if (ret < 0) {
-fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
+error_report("qcow2_free_clusters failed: %s", strerror(-ret));
 /* TODO Remember the clusters to free them later and avoid leaking */
 }
 }
@@ -1040,14 +1040,15 @@ static void inc_refcounts(BlockDriverState *bs,
 cluster_offset += s->cluster_size) {
 k = cluster_offset >> s->cluster_bits;
 if (k >= refcount_table_size) {
-fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
-"the end of the image file, can't properly check refcounts.\n",
-cluster_offset);
+error_report("Warning: cluster offset=0x%" PRIx64 " is after the "
+ "end of the image file, "
+ "can't properly check refcounts.",
+ cluster_offset);
 res->check_errors++;
 } else {
 if (++refcount_table[k] == 0) {
-fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
-"\n", cluster_offset);
+error_report("ERROR: overflow cluster offset=0x%" PRIx64,
+ cluster_offset);
 res->corruptions++;
 }
 }
@@ -1091,9 +1092,9 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
 case QCOW2_CLUSTER_COMPRESSED:
 /* Compressed clusters don't have QCOW_OFLAG_COPIED */
 if (l2_entry & QCOW_OFLAG_COPIED) {
-fprintf(stderr, "ERROR: cluster %" PRId64 ": "
-"copied flag must never be set for compressed "
-"clusters\n", l2_entry >> s->cluster_bits);
+error_report("ERROR: cluster %" PRId64 ": "
+ "copied flag must never be set for compresse

[Qemu-devel] About AddressSpace in intel-iommu emulation

2014-06-26 Thread Le Tan
Hi Paolo,
I am adding intel-iommu emulation to q35 for the GSoC project. I am
confused about AddressSpace and I believe that you can help me. :)
1. For intel-iommu emulation, I have to read the translation
structures from guest memory, that is, the guest will prepare some
tables in memory and write the physical address of them to a register
of intel-iommu, and I need to access those structures. I use
dma_memory_read(&address_space_memory,...) to do this. Is that right?
I am not sure that whether accesses to address_space_memory will be
translated through IOMMU. I think the answer is not, because I see
that cpu_physical_memory_read() also use address_space_memory as
AddressSpace.

2. In my opinion, I have to init a AddressSpace and link it with my
IOMMU MemoryRegion, then the bus uses this AddressSpace to translate
the accesses. Is that right? For q35, how can I register my IOMMU
MemoryRegion to the bus? I see that there is function
pci_setup_iommu() that links a AddressSpace to the bus to translate
accesses to PCI into system memory. Is that related? I think q35
should maintain a bus AddressSpace, but I can't find it.
What do you think?

Thanks very much!

Regards,
Le Tan



Re: [Qemu-devel] About AddressSpace in intel-iommu emulation

2014-06-26 Thread Le Tan
2014-06-26 22:05 GMT+08:00 Paolo Bonzini :
> Il 26/06/2014 16:01, Le Tan ha scritto:
>
>> Hi Paolo,
>> I am adding intel-iommu emulation to q35 for the GSoC project. I am
>> confused about AddressSpace and I believe that you can help me. :)
>> 1. For intel-iommu emulation, I have to read the translation
>> structures from guest memory, that is, the guest will prepare some
>> tables in memory and write the physical address of them to a register
>> of intel-iommu, and I need to access those structures. I use
>> dma_memory_read(&address_space_memory,...) to do this. Is that right?
>> I am not sure that whether accesses to address_space_memory will be
>> translated through IOMMU. I think the answer is not, because I see
>> that cpu_physical_memory_read() also use address_space_memory as
>> AddressSpace.
>
>
> Correct.
>
>
>> 2. In my opinion, I have to init a AddressSpace and link it with my
>> IOMMU MemoryRegion, then the bus uses this AddressSpace to translate
>> the accesses. Is that right? For q35, how can I register my IOMMU
>> MemoryRegion to the bus? I see that there is function
>> pci_setup_iommu() that links a AddressSpace to the bus to translate
>> accesses to PCI into system memory. Is that related? I think q35
>> should maintain a bus AddressSpace, but I can't find it.
>> What do you think?
>
>
> Right now, the q35 PCI host does not define an iommu_fn, so the default DMA
> address space is used by pci_device_iommu_address_space.  This is just
> address_space_memory.
>
> The iommu_fn is set with pci_setup_iommu.  Commit ae74bbe (apb: implement
> IOMMU translation for PCI host bridge, 2014-05-28) provides an example of
> how to prepare an IOMMU memory region, add it to an address space, and
> return that address space from an iommu_fn.
>
> Thanks,

Thanks very much! I saw the example of apb and typoon before and was
confused that why there is no such thing in q35. And it is clear now.
Thanks very much! :)
ps: Send this email again because I forgot to reply to all in last
email. Sorry. :)

Regards,
Le

> Paolo
>
>
>> Thanks very much!
>>
>> Regards,
>> Le Tan
>>
>



Re: [Qemu-devel] About AddressSpace in intel-iommu emulation

2014-06-26 Thread Le Tan
2014-06-26 22:05 GMT+08:00 Paolo Bonzini :
> Il 26/06/2014 16:01, Le Tan ha scritto:
>
>> Hi Paolo,
>> I am adding intel-iommu emulation to q35 for the GSoC project. I am
>> confused about AddressSpace and I believe that you can help me. :)
>> 1. For intel-iommu emulation, I have to read the translation
>> structures from guest memory, that is, the guest will prepare some
>> tables in memory and write the physical address of them to a register
>> of intel-iommu, and I need to access those structures. I use
>> dma_memory_read(&address_space_memory,...) to do this. Is that right?
>> I am not sure that whether accesses to address_space_memory will be
>> translated through IOMMU. I think the answer is not, because I see
>> that cpu_physical_memory_read() also use address_space_memory as
>> AddressSpace.
>
>
> Correct.
>
>
>> 2. In my opinion, I have to init a AddressSpace and link it with my
>> IOMMU MemoryRegion, then the bus uses this AddressSpace to translate
>> the accesses. Is that right? For q35, how can I register my IOMMU
>> MemoryRegion to the bus? I see that there is function
>> pci_setup_iommu() that links a AddressSpace to the bus to translate
>> accesses to PCI into system memory. Is that related? I think q35
>> should maintain a bus AddressSpace, but I can't find it.
>> What do you think?
>
>
> Right now, the q35 PCI host does not define an iommu_fn, so the default DMA
> address space is used by pci_device_iommu_address_space.  This is just
> address_space_memory.
>
> The iommu_fn is set with pci_setup_iommu.  Commit ae74bbe (apb: implement
> IOMMU translation for PCI host bridge, 2014-05-28) provides an example of
> how to prepare an IOMMU memory region, add it to an address space, and
> return that address space from an iommu_fn.

Hi Paolo,
I have added the address space to q35 and the translate function of
intel_iommu is called. :) However, I still have some questions here.
1. In struct IOMMUTLBEntry, I think the addr_mask field should be the
mask of the page offset, right? But I see different usages of this
field. In spapr_tce_translate_iommu(), the addr_mask field is assigned
with the mask of the page offset. However, in pbm_translate_iommu(),
in the passthrough case, the addr_mask field seems to be assigned the
mask of the page number. Is there any problem here?
2. For q35, how to identify origination of DMA requests? The VT-d
manual says we should use source-id(for PCI-Express devices, it is
requester identifier) to map devices to domains. What is the related
part in QEMU? Where can I get the source-id of a DMA request?

Thanks very much!

Le


> Thanks,
>
> Paolo
>
>
>> Thanks very much!
>>
>> Regards,
>> Le Tan
>>
>



Re: [Qemu-devel] About AddressSpace in intel-iommu emulation

2014-06-26 Thread Le Tan
2014-06-27 12:55 GMT+08:00 Paolo Bonzini :
> Il 27/06/2014 04:08, Le Tan ha scritto:
>
>> 1. In struct IOMMUTLBEntry, I think the addr_mask field should be the
>> mask of the page offset, right? But I see different usages of this
>> field. In spapr_tce_translate_iommu(), the addr_mask field is assigned
>> with the mask of the page offset. However, in pbm_translate_iommu(),
>> in the passthrough case, the addr_mask field seems to be assigned the
>> mask of the page number. Is there any problem here?
>
>
> The intended usage is the one of spapr_tce_translate_iommu().  In practice
> it doesn't matter, both work.
>
>
>> 2. For q35, how to identify origination of DMA requests? The VT-d
>> manual says we should use source-id(for PCI-Express devices, it is
>> requester identifier) to map devices to domains. What is the related
>> part in QEMU? Where can I get the source-id of a DMA request?
>
>
> You need to create a different AddressSpace for each PCI bus or device.

How to create a different AddressSpace for each device? I thought a
AddressSpace just belongs to a PCI bus before. The paging structures
for different functions of the same device can also be different, too.
So maybe we should create a different AddressSpace for each function?
How to achieve it? Could you give me some more hints or is there any
existing example in QEMU?
Thanks very much!

Le

> Paolo



Re: [Qemu-devel] About AddressSpace in intel-iommu emulation

2014-06-27 Thread Le Tan
2014-06-27 17:55 GMT+08:00 Jan Kiszka :
> On 2014-06-27 07:46, Le Tan wrote:
>> 2014-06-27 12:55 GMT+08:00 Paolo Bonzini :
>>> Il 27/06/2014 04:08, Le Tan ha scritto:
>>>
>>>> 1. In struct IOMMUTLBEntry, I think the addr_mask field should be the
>>>> mask of the page offset, right? But I see different usages of this
>>>> field. In spapr_tce_translate_iommu(), the addr_mask field is assigned
>>>> with the mask of the page offset. However, in pbm_translate_iommu(),
>>>> in the passthrough case, the addr_mask field seems to be assigned the
>>>> mask of the page number. Is there any problem here?
>>>
>>>
>>> The intended usage is the one of spapr_tce_translate_iommu().  In practice
>>> it doesn't matter, both work.
>>>
>>>
>>>> 2. For q35, how to identify origination of DMA requests? The VT-d
>>>> manual says we should use source-id(for PCI-Express devices, it is
>>>> requester identifier) to map devices to domains. What is the related
>>>> part in QEMU? Where can I get the source-id of a DMA request?
>>>
>>>
>>> You need to create a different AddressSpace for each PCI bus or device.
>>
>> How to create a different AddressSpace for each device? I thought a
>> AddressSpace just belongs to a PCI bus before. The paging structures
>> for different functions of the same device can also be different, too.
>> So maybe we should create a different AddressSpace for each function?
>> How to achieve it? Could you give me some more hints or is there any
>> existing example in QEMU?
>
> I would suggest to study the apb IOMMU implementation Paolo referenced
> and the PCI layer functions used by that code. Specifically,
> pci_setup_iommu takes a callback that is supposed to return an address
> space to be used for a particular device. For apb, it's the same for all
> devices on a bus, but that's not required...

Yeah, I notice that the third parameter passed to bus->iommu_fn() is
the key. That is the dev->devfn, which is the combination of #device
and #function. I was so careless that I didn't noticed this field. I
think I am getting close to the answer. I will dive into this later.
:)
Thanks very much!

Regards,
Le

>
> Jan
>
>



[Qemu-devel] Why devfn will be -1

2014-06-30 Thread Le Tan
Hi Jan,
I use pci_setup_iommu() to setup a PCIIOMMUFunc for the q35 pci bus.
In the iommu_fn, I print out the devfn parameter and find out that it
sometimes will be -1. So what does it mean?
The detail code is here:

In mch_init() function, I write like this:
PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch)));
pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu);

And in q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn), I
print out the devfn parameter, sometimes it will be -1.

Thanks very much!

Regards,
Le



Re: [Qemu-devel] Why devfn will be -1

2014-07-01 Thread Le Tan
Hi Jan,

2014-07-01 15:34 GMT+08:00 Jan Kiszka :
> Hi Le,
>
> On 2014-07-01 04:34, Le Tan wrote:
>> Hi Jan,
>> I use pci_setup_iommu() to setup a PCIIOMMUFunc for the q35 pci bus.
>> In the iommu_fn, I print out the devfn parameter and find out that it
>> sometimes will be -1. So what does it mean?
>> The detail code is here:
>>
>> In mch_init() function, I write like this:
>> PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch)));
>> pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu);
>>
>> And in q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn), I
>> print out the devfn parameter, sometimes it will be -1.
>
> Hmm, I have no idea about the reason and would suggest to set a
> conditional breakpoint on this function, then print the backtrace to see
> where this comes from and analyze the device structure from where that
> -1 was most probably taken.

I have set a breakpoint here and cases whose devfn is -1 are different
from each run. ICH9 LPC, cirrus-vga and e1000, these three devices'
initialization will route to this problem. Sometimes the
initialization of ICH9 LPC will be normal. The backtrace are given
below. I trace to the function do_pci_register_device() and see that
the dev->devfn is initialized after the
pci_device_iommu_address_space() is called. So I think this is so
strange. Is the devfn parameter to the q35_host_dma_iommu() not
reliable and usable?
Thanks very much!

#0  q35_host_dma_iommu (bus=0x5629d280, opaque=0x566a4df0, devfn=-1)
at /home/tamlok/qemu-git/hw/pci-host/q35.c:351
#1  0x558524ef in pci_device_iommu_address_space
(dev=0x566a7e20) at /home/tamlok/qemu-git/hw/pci/pci.c:2284
#2  0x5584e5b4 in do_pci_register_device
(pci_dev=0x566a7e20, bus=0x5629d280,
name=0x5624a250 "ICH9 LPC", devfn=248) at
/home/tamlok/qemu-git/hw/pci/pci.c:830
#3  0x55850bac in pci_qdev_init (qdev=0x566a7e20) at
/home/tamlok/qemu-git/hw/pci/pci.c:1759
#4  0x557c9359 in device_realize (dev=0x566a7e20,
errp=0x7fffdd50)
at /home/tamlok/qemu-git/hw/core/qdev.c:182
#5  0x557cb15c in device_set_realized (obj=0x566a7e20,
value=true, errp=0x7fffded0)
at /home/tamlok/qemu-git/hw/core/qdev.c:809
#6  0x558d3234 in property_set_bool (obj=0x566a7e20,
v=0x5668bec0, opaque=0x5662b360,
name=0x559c2a86 "realized", errp=0x7fffded0) at
/home/tamlok/qemu-git/qom/object.c:1421
#7  0x558d1a68 in object_property_set (obj=0x566a7e20,
v=0x5668bec0, name=0x559c2a86 "realized",
errp=0x7fffded0) at /home/tamlok/qemu-git/qom/object.c:819
#8  0x558d3a26 in object_property_set_qobject
(obj=0x566a7e20, value=0x566acfd0,
name=0x559c2a86 "realized", errp=0x7fffded0) at
/home/tamlok/qemu-git/qom/qom-qobject.c:24
#9  0x558d1d65 in object_property_set_bool
(obj=0x566a7e20, value=true, name=0x559c2a86 "realized",
errp=0x7fffded0) at /home/tamlok/qemu-git/qom/object.c:883
#10 0x557c9295 in qdev_init (dev=0x566a7e20) at
/home/tamlok/qemu-git/hw/core/qdev.c:167
#11 0x557c98a3 in qdev_init_nofail (dev=0x566a7e20) at
/home/tamlok/qemu-git/hw/core/qdev.c:290
#12 0x55850d6f in pci_create_simple_multifunction
(bus=0x5629d280, devfn=248, multifunction=true,
name=0x559a3783 "ICH9 LPC") at /home/tamlok/qemu-git/hw/pci/pci.c:1800
#13 0x556943ce in pc_q35_init (machine=0x56281c00) at
/home/tamlok/qemu-git/hw/i386/pc_q35.c:176
#14 0x5573c72f in main (argc=19, argv=0x7fffe4d8,
envp=0x7fffe578) at /home/tamlok/qemu-git/vl.c:4441

#0  q35_host_dma_iommu (bus=0x5629d280, opaque=0x566a4df0, devfn=-1)
at /home/tamlok/qemu-git/hw/pci-host/q35.c:351
#1  0x558524ef in pci_device_iommu_address_space
(dev=0x566ef530) at /home/tamlok/qemu-git/hw/pci/pci.c:2284
#2  0x5584e5b4 in do_pci_register_device
(pci_dev=0x566ef530, bus=0x5629d280,
name=0x562551a0 "cirrus-vga", devfn=8) at
/home/tamlok/qemu-git/hw/pci/pci.c:830
#3  0x55850bac in pci_qdev_init (qdev=0x566ef530) at
/home/tamlok/qemu-git/hw/pci/pci.c:1759
#4  0x557c9359 in device_realize (dev=0x566ef530,
errp=0x7fffdca0)
at /home/tamlok/qemu-git/hw/core/qdev.c:182
#5  0x557cb15c in device_set_realized (obj=0x566ef530,
value=true, errp=0x7fffde20)
at /home/tamlok/qemu-git/hw/core/qdev.c:809
#6  0x558d3234 in property_set_bool (obj=0x566ef530,
v=0x5666e300, opaque=0x56856f90,
name=0x559c2a86 "realized", errp=0x7fffde20) at
/home/tamlok/qemu-git/qom/object.c:1421
#7  0x558d1a68 in object_property_set (obj=0x566ef530,
v=0x5666e300, name=0x559c2a86 "realized",
errp=0x7fffde2

Re: [Qemu-devel] Why devfn will be -1

2014-07-01 Thread Le Tan
2014-07-01 20:52 GMT+08:00 Le Tan :
> Hi Jan,
>
> 2014-07-01 15:34 GMT+08:00 Jan Kiszka :
>> Hi Le,
>>
>> On 2014-07-01 04:34, Le Tan wrote:
>>> Hi Jan,
>>> I use pci_setup_iommu() to setup a PCIIOMMUFunc for the q35 pci bus.
>>> In the iommu_fn, I print out the devfn parameter and find out that it
>>> sometimes will be -1. So what does it mean?
>>> The detail code is here:
>>>
>>> In mch_init() function, I write like this:
>>> PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch)));
>>> pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu);
>>>
>>> And in q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn), I
>>> print out the devfn parameter, sometimes it will be -1.
>>
>> Hmm, I have no idea about the reason and would suggest to set a
>> conditional breakpoint on this function, then print the backtrace to see
>> where this comes from and analyze the device structure from where that
>> -1 was most probably taken.

I think maybe this is a bug? In the function do_pci_register_device(),
maybe these two sentence should be reorder?
dma_as = pci_device_iommu_address_space(pci_dev);
pci_dev->devfn = devfn;

> I have set a breakpoint here and cases whose devfn is -1 are different
> from each run. ICH9 LPC, cirrus-vga and e1000, these three devices'
> initialization will route to this problem. Sometimes the
> initialization of ICH9 LPC will be normal. The backtrace are given
> below. I trace to the function do_pci_register_device() and see that
> the dev->devfn is initialized after the
> pci_device_iommu_address_space() is called. So I think this is so
> strange. Is the devfn parameter to the q35_host_dma_iommu() not
> reliable and usable?
> Thanks very much!
>
> #0  q35_host_dma_iommu (bus=0x5629d280, opaque=0x566a4df0, devfn=-1)
> at /home/tamlok/qemu-git/hw/pci-host/q35.c:351
> #1  0x558524ef in pci_device_iommu_address_space
> (dev=0x566a7e20) at /home/tamlok/qemu-git/hw/pci/pci.c:2284
> #2  0x5584e5b4 in do_pci_register_device
> (pci_dev=0x566a7e20, bus=0x5629d280,
> name=0x5624a250 "ICH9 LPC", devfn=248) at
> /home/tamlok/qemu-git/hw/pci/pci.c:830
> #3  0x55850bac in pci_qdev_init (qdev=0x566a7e20) at
> /home/tamlok/qemu-git/hw/pci/pci.c:1759
> #4  0x557c9359 in device_realize (dev=0x566a7e20,
> errp=0x7fffdd50)
> at /home/tamlok/qemu-git/hw/core/qdev.c:182
> #5  0x557cb15c in device_set_realized (obj=0x566a7e20,
> value=true, errp=0x7fffded0)
> at /home/tamlok/qemu-git/hw/core/qdev.c:809
> #6  0x558d3234 in property_set_bool (obj=0x566a7e20,
> v=0x5668bec0, opaque=0x5662b360,
> name=0x559c2a86 "realized", errp=0x7fffded0) at
> /home/tamlok/qemu-git/qom/object.c:1421
> #7  0x558d1a68 in object_property_set (obj=0x566a7e20,
> v=0x5668bec0, name=0x559c2a86 "realized",
> errp=0x7fffded0) at /home/tamlok/qemu-git/qom/object.c:819
> #8  0x558d3a26 in object_property_set_qobject
> (obj=0x566a7e20, value=0x566acfd0,
> name=0x559c2a86 "realized", errp=0x7fffded0) at
> /home/tamlok/qemu-git/qom/qom-qobject.c:24
> #9  0x558d1d65 in object_property_set_bool
> (obj=0x566a7e20, value=true, name=0x559c2a86 "realized",
> errp=0x7fffded0) at /home/tamlok/qemu-git/qom/object.c:883
> #10 0x557c9295 in qdev_init (dev=0x566a7e20) at
> /home/tamlok/qemu-git/hw/core/qdev.c:167
> #11 0x557c98a3 in qdev_init_nofail (dev=0x566a7e20) at
> /home/tamlok/qemu-git/hw/core/qdev.c:290
> #12 0x55850d6f in pci_create_simple_multifunction
> (bus=0x5629d280, devfn=248, multifunction=true,
> name=0x559a3783 "ICH9 LPC") at /home/tamlok/qemu-git/hw/pci/pci.c:1800
> #13 0x556943ce in pc_q35_init (machine=0x56281c00) at
> /home/tamlok/qemu-git/hw/i386/pc_q35.c:176
> #14 0x5573c72f in main (argc=19, argv=0x7fffe4d8,
> envp=0x7fffe578) at /home/tamlok/qemu-git/vl.c:4441
>
> #0  q35_host_dma_iommu (bus=0x5629d280, opaque=0x566a4df0, devfn=-1)
> at /home/tamlok/qemu-git/hw/pci-host/q35.c:351
> #1  0x558524ef in pci_device_iommu_address_space
> (dev=0x566ef530) at /home/tamlok/qemu-git/hw/pci/pci.c:2284
> #2  0x5584e5b4 in do_pci_register_device
> (pci_dev=0x566ef530, bus=0x5629d280,
> name=0x562551a0 "cirrus-vga", devfn=8) at
> /home/tamlok/qemu-git/hw/pci/pci.c:830
> #3  0x55850bac in pci_qdev_init (qdev=0x566ef530) at
> /home/tamlok/qemu-git/hw/pci/pci.c:1759
> 

Re: [Qemu-devel] Why devfn will be -1

2014-07-01 Thread Le Tan
2014-07-01 20:56 GMT+08:00 Jan Kiszka :
> On 2014-07-01 14:55, Le Tan wrote:
>> 2014-07-01 20:52 GMT+08:00 Le Tan :
>>> Hi Jan,
>>>
>>> 2014-07-01 15:34 GMT+08:00 Jan Kiszka :
>>>> Hi Le,
>>>>
>>>> On 2014-07-01 04:34, Le Tan wrote:
>>>>> Hi Jan,
>>>>> I use pci_setup_iommu() to setup a PCIIOMMUFunc for the q35 pci bus.
>>>>> In the iommu_fn, I print out the devfn parameter and find out that it
>>>>> sometimes will be -1. So what does it mean?
>>>>> The detail code is here:
>>>>>
>>>>> In mch_init() function, I write like this:
>>>>> PCIBus *pci_bus = PCI_BUS(qdev_get_parent_bus(DEVICE(mch)));
>>>>> pci_setup_iommu(pci_bus, q35_host_dma_iommu, mch->iommu);
>>>>>
>>>>> And in q35_host_dma_iommu(PCIBus *bus, void *opaque, int devfn), I
>>>>> print out the devfn parameter, sometimes it will be -1.
>>>>
>>>> Hmm, I have no idea about the reason and would suggest to set a
>>>> conditional breakpoint on this function, then print the backtrace to see
>>>> where this comes from and analyze the device structure from where that
>>>> -1 was most probably taken.
>>
>> I think maybe this is a bug? In the function do_pci_register_device(),
>> maybe these two sentence should be reorder?
>> dma_as = pci_device_iommu_address_space(pci_dev);
>> pci_dev->devfn = devfn;
>
> Looks like. Give it a try, then possibly send a patch :)
I reorder these two sentences and get the print log like this:
vtd bus 0 slot 31 func 0 devfn 248
vtd bus 0 slot 31 func 2 devfn 250
vtd bus 0 slot 31 func 3 devfn 251
vtd bus 0 slot 1 func 0 devfn 8
vtd bus 0 slot 2 func 0 devfn 16

The "info pci" output is here:
(qemu) info pci
  Bus  0, device   0, function 0:
Host bridge: PCI device 8086:29c0
  id ""
  Bus  0, device   1, function 0:
VGA controller: PCI device 1013:00b8
  BAR0: 32 bit prefetchable memory at 0xfc00 [0xfdff].
  BAR1: 32 bit memory at 0xfebf [0xfebf0fff].
  BAR6: 32 bit memory at 0x [0xfffe].
  id ""
  Bus  0, device   2, function 0:
Ethernet controller: PCI device 8086:100e
  IRQ 11.
  BAR0: 32 bit memory at 0xfebc [0xfebd].
  BAR1: I/O at 0xc000 [0xc03f].
  BAR6: 32 bit memory at 0x [0x0003fffe].
  id ""
  Bus  0, device  31, function 0:
ISA bridge: PCI device 8086:2918
  id ""
  Bus  0, device  31, function 2:
SATA controller: PCI device 8086:2922
  IRQ 10.
  BAR4: I/O at 0xc080 [0xc09f].
  BAR5: 32 bit memory at 0xfebf1000 [0xfebf1fff].
  id ""
  Bus  0, device  31, function 3:
SMBus: PCI device 8086:2930
  IRQ 10.
  BAR4: I/O at 0x0700 [0x073f].
  id ""

So maybe it is all right now? And I will go on the vtd emulation.:)
Thanks very much!

> Jan
>



[Qemu-devel] [PATCH] pci: assign devfn to pci_dev before calling pci_device_iommu_address_space()

2014-07-01 Thread Le Tan
In function do_pci_register_device() in file hw/pci/pci.c, move the assignment
of pci_dev->devfn to the position before the call to
pci_device_iommu_address_space(pci_dev) which will use the value of
pci_dev->devfn.

Signed-off-by: Le Tan 
---
 hw/pci/pci.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 17ed510..351d320 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -827,6 +827,7 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
 }
 
 pci_dev->bus = bus;
+pci_dev->devfn = devfn;
 dma_as = pci_device_iommu_address_space(pci_dev);
 
 memory_region_init_alias(&pci_dev->bus_master_enable_region,
@@ -836,7 +837,6 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
 address_space_init(&pci_dev->bus_master_as, 
&pci_dev->bus_master_enable_region,
name);
 
-pci_dev->devfn = devfn;
 pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
 pci_dev->irq_state = 0;
 pci_config_alloc(pci_dev);
-- 
1.7.9.5




[Qemu-devel] [PATCH] ahci: map memory via device's address space instead of address_space_memory

2014-07-03 Thread Le Tan
In map_page() in hw/ide/ahci.c, replace cpu_physical_memory_map() and
cpu_physical_memory_unmap() with dma_memory_map() and dma_memory_unmap(),
because ahci devices should not access memory directly but via their address
space. Add an AddressSpace parameter to map_page(). In order to call
map_page(), we should pass the AHCIState.as as the AddressSpace argument.

Signed-off-by: Le Tan 
---
 hw/ide/ahci.c |   21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 9bae22e..7bb0a03 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -175,17 +175,18 @@ static void ahci_trigger_irq(AHCIState *s, AHCIDevice *d,
 ahci_check_irq(s);
 }
 
-static void map_page(uint8_t **ptr, uint64_t addr, uint32_t wanted)
+static void map_page(AddressSpace *as, uint8_t **ptr, uint64_t addr,
+ uint32_t wanted)
 {
 hwaddr len = wanted;
 
 if (*ptr) {
-cpu_physical_memory_unmap(*ptr, len, 1, len);
+dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
 }
 
-*ptr = cpu_physical_memory_map(addr, &len, 1);
+*ptr = dma_memory_map(as, addr, &len, DMA_DIRECTION_FROM_DEVICE);
 if (len < wanted) {
-cpu_physical_memory_unmap(*ptr, len, 1, len);
+dma_memory_unmap(as, *ptr, len, DMA_DIRECTION_FROM_DEVICE, len);
 *ptr = NULL;
 }
 }
@@ -198,24 +199,24 @@ static void  ahci_port_write(AHCIState *s, int port, int 
offset, uint32_t val)
 switch (offset) {
 case PORT_LST_ADDR:
 pr->lst_addr = val;
-map_page(&s->dev[port].lst,
+map_page(s->as, &s->dev[port].lst,
  ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
 s->dev[port].cur_cmd = NULL;
 break;
 case PORT_LST_ADDR_HI:
 pr->lst_addr_hi = val;
-map_page(&s->dev[port].lst,
+map_page(s->as, &s->dev[port].lst,
  ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
 s->dev[port].cur_cmd = NULL;
 break;
 case PORT_FIS_ADDR:
 pr->fis_addr = val;
-map_page(&s->dev[port].res_fis,
+map_page(s->as, &s->dev[port].res_fis,
  ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
 break;
 case PORT_FIS_ADDR_HI:
 pr->fis_addr_hi = val;
-map_page(&s->dev[port].res_fis,
+map_page(s->as, &s->dev[port].res_fis,
  ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
 break;
 case PORT_IRQ_STAT:
@@ -1260,9 +1261,9 @@ static int ahci_state_post_load(void *opaque, int 
version_id)
 ad = &s->dev[i];
 AHCIPortRegs *pr = &ad->port_regs;
 
-map_page(&ad->lst,
+map_page(s->as, &ad->lst,
  ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
-map_page(&ad->res_fis,
+map_page(s->as, &ad->res_fis,
  ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
 /*
  * All pending i/o should be flushed out on a migrate. However,
-- 
1.7.9.5




[Qemu-devel] How to decide the value of Host Address Width in ACPI DMAR table

2014-07-05 Thread Le Tan
Hi,
I am doing Intel IOMMU emulation for q35. I need to add a DMAR table
to ACPI. There is one field in the DMAR table called "Host Address
Width", which indicates the maximum DMA physical addressability
supported by this platform. I am not sure what this value should be
and how to get this value. What is the best way to decide the value of
HAW?
Any suggestion is appreciated. :)
Thanks very much!

Regards,
Le Tan



[Qemu-devel] When QEMU is emulating DMA, can other vcpus read/write the same memory region?

2014-04-14 Thread Le Tan
Hi, I have been confused by this question for a long time, both in
hardware and software.
First, in the real x86 hardware architectures, while the DMA is
operating, can other CPUs access the same memory region? Or the DMA
controller will hold the bus and the memory exclusively and CPUs have
to wait? Of course, we assume that the operating system is too stupid
to provide the mutex in software.
Second, if the CPUs have to wait, then how about in QEMU? When QEMU is
emulating the DMA, can other vcpus access the memory (or exactly, the
same memory region)? If not, how does QEMU achieve this? That is, how
does QEMU prevent other vcpus to access the memory? I have read
through the part of the emulation of DMA, but I didn't see anything
related to this.
Thanks very much!

--Le Tan



Re: [Qemu-devel] [GSoC] Wanted: small warmup tasks

2014-05-01 Thread Le Tan
2014-04-24 19:38 GMT+08:00 Andreas Färber :
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA1
>
> Am 24.04.2014 08:19, schrieb Jan Kiszka:
>> On 2014-04-23 11:25, Stefan Hajnoczi wrote:
>>> Dear QEMU, Libvirt, and KVM communities, We are participating in
>>> Google Summer of Code 2014 (http://google-melange.com/) and
>>> Outreach Program for Women (http://opw.gnome.org/).  Both
>>> programs fund candidates to work on our open source projects for
>>> 12 weeks this summer.
>>
>> To follow up on this: I'm currently looking for optional tiny
>> "warmup" tasks for our QEMU students during the bonding period
>> (till May 18). If you have any trivial issues or extensions in mind
>> that someone could address within a few days or even hours, that
>> would be perfect. It could even be something like "reformat the
>> printing of these messages" or so.
>
> Replacing some more fprintf(stderr, "foo\n") with error_report("foo")
> comes to mind. :)

Hi,
Having consulted my mentor, I want to pick up this as the warm-up
task.:) I will dig into it in the following
days and learn how to contribute in the community.
Thanks!

--Le Tan



[Qemu-devel] VNC viewer displays nothing with -enable-kvm but works fine without -enable-kvm

2014-03-14 Thread Le Tan
Hi, I used qemu-kvm-1.2.0 before. I can run qemu with kvm well. Two days
ago, I updated qemu to qemu-1.6.2, and start the qemu with the command:
qemu-system-x86_64 -m 2048 -smp 4 -hda /home/tanle/study/new.img -nographic
-vnc 162.105.146.118:2
It works fine and slow. It is obviously not using the kvm to accelerate.
Then I use the command:
qemu-system-x86_64 -m 2048 -smp 4 -hda /home/tanle/study/new.img -nographic
-vnc 162.105.146.118:2 -enable-kvm
The VNC viewer can connect to the vm, but it is black and displays nothing.
What is wrong here?
My kernel version is 3.11.0. Will it be the problem of compatibility?
Thanks!

--Le Tan


[Qemu-devel] How to understand the coroutine context?

2014-03-17 Thread Le Tan
Hi, I am diving into the source code of qemu. I see the word
"coroutine" appears in so many places. I can't figure out what it
means. So, please, can anyone help me, telling me the mechanism or
semantic of "coroutine"? Thanks!

--Le Tan



Re: [Qemu-devel] How to understand the coroutine context?

2014-03-17 Thread Le Tan
Thanks, that is an excellent blog!

2014-03-18 12:04 GMT+08:00 Kashyap Chamarthy :
> On Tue, Mar 18, 2014 at 07:56:16AM +0800, Le Tan wrote:
>> Hi, I am diving into the source code of qemu. I see the word
>> "coroutine" appears in so many places. I can't figure out what it
>> means. So, please, can anyone help me, telling me the mechanism or
>> semantic of "coroutine"? Thanks!
>
> This might be of help --
> http://blog.vmsplice.net/2014/01/coroutines-in-qemu-basics.html
>
>
> --
> /kashyap



[Qemu-devel] About Disk I/O and DMA emulation in qemu-kvm

2014-03-19 Thread Le Tan
Hi, I am now diving into the disk I/O in kvm. But I find that things
are a little messy about this and I am stuck here. I configure the
virtual machine with a virtual IDE disk and DMA support. I need to get
every disk I/O issued by the virtual machine. I know that there are
struct IDEDevie, struct IDEBus, struct IDEDMA etc. in the qemu. But I
just can't figure out the logical relationships among these things and
the general code path of the disk I/O or DMA.
So, how is IDE disk or DMA emulated in qemu-kvm? What is the gerneral
path or logic when the vcpu issues a disk I/O? Can anyone give me the
complete gerneral description or idea of the disk I/O in qemu-kvm? Or
which part of the code should be taken in consideration? Maybe there
are some useful materials?
Maybe the question is a little complicated. Any help is appreciated! Thanks!

--Le Tan



Re: [Qemu-devel] VNC viewer displays nothing with -enable-kvm but works fine without -enable-kvm

2014-03-20 Thread Le Tan
I have tried 1.7.0 and it doesn't work either. And I find that it doesn't
work with QEMU whose version is above 1.3. Then I change the kernel from
3.11.0 to the latest release, and it works fine with QEMU 1.6.2 and
1.7.0!!! I know that qemu-kvm have been merged back into qemu after 1.2.2.
So maybe there is something different between before and after this? Maybe
it is the problem of compatibility between the old kernel and the new QEMU?
Thanks.

--Le Tan


2014-03-20 15:45 GMT+08:00 Stefan Hajnoczi :

> On Fri, Mar 14, 2014 at 04:02:58PM +0800, Le Tan wrote:
> > Hi, I used qemu-kvm-1.2.0 before. I can run qemu with kvm well. Two days
> > ago, I updated qemu to qemu-1.6.2, and start the qemu with the command:
> > qemu-system-x86_64 -m 2048 -smp 4 -hda /home/tanle/study/new.img
> -nographic
> > -vnc 162.105.146.118:2
> > It works fine and slow. It is obviously not using the kvm to accelerate.
> > Then I use the command:
> > qemu-system-x86_64 -m 2048 -smp 4 -hda /home/tanle/study/new.img
> -nographic
> > -vnc 162.105.146.118:2 -enable-kvm
> > The VNC viewer can connect to the vm, but it is black and displays
> nothing.
> > What is wrong here?
>
> Please use the latest upstream QEMU release (1.7.0) or contact your
> distro if you are using a packaged version of QEMU.
>
> Stefan
>


[Qemu-devel] Disk I/O in QEMU

2014-03-11 Thread Le Tan
Hi, I am now studying the disk I/O in QEMU, but I know little about QEMU. I
want to know how QEMU implements or emulates the disk I/O? Where to find
some materials about this? And if I can get all the disk I/O request from
the guest in QEMU? Thanks!


Re: [Qemu-devel] Disk I/O in QEMU

2014-03-12 Thread Le Tan
Hi Stefan,
Thanks for your help. I find that the source code of qemu-1.2.0 doesn't
have hw/block/. So I download the qemu-1.6.2. However it doesn't work. I
run the configure like this:
./configure --enable-kvm --x86_64-softmmu
then make and make install.
I can start the vm using this command:
qemu-system-x86_64 -m 1024 -smp 2 -hda my.img
But the qemu is not using the kvm to accelerate. Then I add the
"-enable-kvm" in the command. I connect to the vm via VNC. However, the
vnc-viewer is just black and displays nothing.
It used to work fine with qemu-1.2.0 (qemu-kvm). Of course, with qemu-kvm,
there is no need to add the "-enable-kvm" option.
So with qemu-1.6.2, did I miss any options?
Thanks!


2014-03-11 20:23 GMT+08:00 Stefan Hajnoczi :

> On Tue, Mar 11, 2014 at 03:08:52PM +0800, Le Tan wrote:
> > Hi, I am now studying the disk I/O in QEMU, but I know little about
> QEMU. I
> > want to know how QEMU implements or emulates the disk I/O? Where to find
> > some materials about this? And if I can get all the disk I/O request from
> > the guest in QEMU? Thanks!
>
> QEMU emulates hardware including IDE or SCSI controllers.  I/O requests
> go through QEMU.  Look at hw/block/.
>
> Stefan
>


Re: [Qemu-devel] Disk I/O in QEMU

2014-03-12 Thread Le Tan
I am sorry I didn't give the complete command I have executed. It is like
this:
qemu-system-x86_64 -m 1024 -smp 2 -hda /home/tanle/study/new.img -nographic
-vnc 192.168.146.118:2  -enable-kvm
If I delete the "-enable-kvm", it is ok and the system can boot up (just
slow). But with "-enable-kvm", the vnc viewer is just black, displaying
nothing.
So did I miss any options? Or there is something else wrong?
Thanks!


2014-03-12 21:13 GMT+08:00 Fam Zheng :

> On Wed, 03/12 20:27, Le Tan wrote:
> > Hi Stefan,
> > Thanks for your help. I find that the source code of qemu-1.2.0 doesn't
> > have hw/block/. So I download the qemu-1.6.2. However it doesn't work. I
> > run the configure like this:
> > ./configure --enable-kvm --x86_64-softmmu
> > then make and make install.
> > I can start the vm using this command:
> > qemu-system-x86_64 -m 1024 -smp 2 -hda my.img
> > But the qemu is not using the kvm to accelerate. Then I add the
> > "-enable-kvm" in the command. I connect to the vm via VNC. However, the
> > vnc-viewer is just black and displays nothing.
> > It used to work fine with qemu-1.2.0 (qemu-kvm). Of course, with
> qemu-kvm,
> > there is no need to add the "-enable-kvm" option.
> > So with qemu-1.6.2, did I miss any options?
> > Thanks!
> >
>
> But you didn't specify vnc as display in your command line?
>
> Try specify "-vnc :0" and connect with "vncviewer :0".
>
> Fam
>