Hi Joel,

Nice! I've been working on making the PIIX south bridge Xen agnostic, partly to 
show how Xen enablement in Q35 could look like. Not that I'd have any use case 
for it but great to see that you've actually done that!

I know you didn't intend to send this patch but I'll give you some early 
comments anyway.

Am 20. Juni 2023 17:24:35 UTC schrieb Joel Upham <jupham...@gmail.com>:
>---
> hw/acpi/ich9.c                |   22 +-
> hw/acpi/pcihp.c               |    6 +-
> hw/core/machine.c             |   19 +
> hw/i386/pc_piix.c             |    3 +-
> hw/i386/pc_q35.c              |   39 +-
> hw/i386/xen/xen-hvm.c         |    7 +-
> hw/i386/xen/xen_platform.c    |   19 +-
> hw/isa/lpc_ich9.c             |   53 +-
> hw/isa/piix3.c                |    2 +-
> hw/pci-host/q35.c             |   28 +-
> hw/pci/pci.c                  |   17 +
> hw/xen/xen-host-pci-device.c  |  106 +++-
> hw/xen/xen-host-pci-device.h  |    6 +-
> hw/xen/xen_pt.c               |   49 +-
> hw/xen/xen_pt.h               |   19 +-
> hw/xen/xen_pt_config_init.c   | 1103 ++++++++++++++++++++++++++++++---
> include/hw/acpi/ich9.h        |    1 +
> include/hw/acpi/pcihp.h       |    2 +
> include/hw/boards.h           |    1 +
> include/hw/i386/pc.h          |    3 +
> include/hw/pci-host/q35.h     |    4 +-
> include/hw/pci/pci.h          |    3 +
> include/hw/southbridge/ich9.h |    1 +
> include/hw/xen/xen.h          |    4 +-
> qemu-options.hx               |    1 +
> softmmu/datadir.c             |    1 -
> softmmu/qdev-monitor.c        |    3 +-
> stubs/xen-hw-stub.c           |    4 +-
> 28 files changed, 1395 insertions(+), 131 deletions(-)
>
>diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
>index 25e2c7243e..234706a191 100644
>--- a/hw/acpi/ich9.c
>+++ b/hw/acpi/ich9.c
>@@ -39,6 +39,8 @@
> #include "hw/southbridge/ich9.h"
> #include "hw/mem/pc-dimm.h"
> #include "hw/mem/nvdimm.h"
>+#include "hw/xen/xen.h"
>+#include "sysemu/xen.h"
> 
> //#define DEBUG
> 
>@@ -67,6 +69,10 @@ static void ich9_gpe_writeb(void *opaque, hwaddr addr, 
>uint64_t val,
>     ICH9LPCPMRegs *pm = opaque;
>     acpi_gpe_ioport_writeb(&pm->acpi_regs, addr, val);
>     acpi_update_sci(&pm->acpi_regs, pm->irq);
>+
>+    if (xen_enabled()) {
>+        acpi_pcihp_reset(&pm->acpi_pci_hotplug);
>+    }
> }
> 
> static const MemoryRegionOps ich9_gpe_ops = {
>@@ -137,7 +143,8 @@ static int ich9_pm_post_load(void *opaque, int version_id)
> {
>     ICH9LPCPMRegs *pm = opaque;
>     uint32_t pm_io_base = pm->pm_io_base;
>-    pm->pm_io_base = 0;
>+    if (!xen_enabled())
>+        pm->pm_io_base = 0;
>     ich9_pm_iospace_update(pm, pm_io_base);
>     return 0;
> }
>@@ -268,7 +275,10 @@ static void pm_reset(void *opaque)
>     acpi_pm1_evt_reset(&pm->acpi_regs);
>     acpi_pm1_cnt_reset(&pm->acpi_regs);
>     acpi_pm_tmr_reset(&pm->acpi_regs);
>-    acpi_gpe_reset(&pm->acpi_regs);
>+    /* Noticed guest freezing in xen when this was reset after S3. */
>+    if (!xen_enabled()) {
>+        acpi_gpe_reset(&pm->acpi_regs);
>+    }

I wonder why this seems to work with PIIX?

I'd rather try to keep the Xen impact on the device model as low as possible, 
ideally as low as in the PIIX4 ACPI device model.

> 
>     pm->smi_en = 0;
>     if (!pm->smm_enabled) {
>@@ -316,7 +326,7 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, 
>qemu_irq sci_irq)
>         acpi_pm_tco_init(&pm->tco_regs, &pm->io);
>     }
> 
>-    if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge) {
>+    if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge || xen_enabled()) {
>         acpi_pcihp_init(OBJECT(lpc_pci),
>                         &pm->acpi_pci_hotplug,
>                         pci_get_bus(lpc_pci),
>@@ -332,10 +342,14 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, 
>qemu_irq sci_irq)
>     pm->powerdown_notifier.notify = pm_powerdown_req;
>     qemu_register_powerdown_notifier(&pm->powerdown_notifier);
> 
>+    if (xen_enabled()) {
>+            acpi_set_pci_info(true);
>+    }
>+
>     legacy_acpi_cpu_hotplug_init(pci_address_space_io(lpc_pci),
>         OBJECT(lpc_pci), &pm->gpe_cpu, ICH9_CPU_HOTPLUG_IO_BASE);
> 
>-    if (pm->acpi_memory_hotplug.is_enabled) {
>+    if (pm->acpi_memory_hotplug.is_enabled || xen_enabled()) {
>         acpi_memory_hotplug_init(pci_address_space_io(lpc_pci), 
> OBJECT(lpc_pci),
>                                  &pm->acpi_memory_hotplug,
>                                  ACPI_MEMORY_HOTPLUG_BASE);
>diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
>index cdd6f775a1..5b065d670c 100644
>--- a/hw/acpi/pcihp.c
>+++ b/hw/acpi/pcihp.c
>@@ -40,6 +40,7 @@
> #include "qapi/error.h"
> #include "qom/qom-qobject.h"
> #include "trace.h"
>+#include "sysemu/xen.h"
> 
> #define ACPI_PCIHP_SIZE 0x0018
> #define PCI_UP_BASE 0x0000
>@@ -84,7 +85,8 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
>     bool is_bridge = IS_PCI_BRIDGE(br);
> 
>     /* hotplugged bridges can't be described in ACPI ignore them */
>-    if (qbus_is_hotpluggable(BUS(bus))) {
>+    /* Xen requires hotplugging to the root device, even on the Q35 chipset */
>+    if (qbus_is_hotpluggable(BUS(bus)) || xen_enabled()) {
>         if (!is_bridge || (!br->hotplugged && info->has_bridge_hotplug)) {
>             bus_bsel = g_malloc(sizeof *bus_bsel);
> 
>@@ -97,7 +99,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
>     return info;
> }
> 
>-static void acpi_set_pci_info(bool has_bridge_hotplug)
>+void acpi_set_pci_info(bool has_bridge_hotplug)
> {
>     static bool bsel_is_set;
>     Object *host = acpi_get_i386_pci_host();
>diff --git a/hw/core/machine.c b/hw/core/machine.c
>index 1000406211..703138d2ec 100644
>--- a/hw/core/machine.c
>+++ b/hw/core/machine.c
>@@ -455,6 +455,20 @@ static void machine_set_graphics(Object *obj, bool value, 
>Error **errp)
>     ms->enable_graphics = value;
> }
> 
>+static bool machine_get_xen_platform_dev(Object *obj, Error **errp)
>+{
>+    MachineState *ms = MACHINE(obj);
>+
>+    return ms->xen_platform_dev;
>+}
>+
>+static void machine_set_xen_platform_dev(Object *obj, bool value, Error 
>**errp)
>+{
>+    MachineState *ms = MACHINE(obj);
>+
>+    ms->xen_platform_dev = value;
>+}
>+
> static char *machine_get_firmware(Object *obj, Error **errp)
> {
>     MachineState *ms = MACHINE(obj);
>@@ -1004,6 +1018,11 @@ static void machine_class_init(ObjectClass *oc, void 
>*data)
>     object_class_property_set_description(oc, "graphics",
>         "Set on/off to enable/disable graphics emulation");
> 
>+    object_class_property_add_bool(oc, "xen-platform-dev",
>+        machine_get_xen_platform_dev, machine_set_xen_platform_dev);
>+    object_class_property_set_description(oc, "xen-platform-dev",
>+        "Set on/off to enable/disable Xen Platform device");
>+
>     object_class_property_add_str(oc, "firmware",
>         machine_get_firmware, machine_set_firmware);
>     object_class_property_set_description(oc, "firmware",
>diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>index d5b0dcd1fe..8c1b20f3bc 100644
>--- a/hw/i386/pc_piix.c
>+++ b/hw/i386/pc_piix.c
>@@ -62,6 +62,7 @@
> #endif
> #include "hw/xen/xen-x86.h"
> #include "hw/xen/xen.h"
>+#include "sysemu/xen.h"
> #include "migration/global_state.h"
> #include "migration/misc.h"
> #include "sysemu/numa.h"
>@@ -233,7 +234,7 @@ static void pc_init1(MachineState *machine,
>                               x86ms->above_4g_mem_size,
>                               pci_memory, ram_memory);
>         pci_bus_map_irqs(pci_bus,
>-                         xen_enabled() ? xen_pci_slot_get_pirq
>+                         xen_enabled() ? xen_cmn_pci_slot_get_pirq
>                                        : pc_pci_slot_get_pirq);
>         pcms->bus = pci_bus;
> 
>diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>index 6155427e48..81aa9e04f0 100644
>--- a/hw/i386/pc_q35.c
>+++ b/hw/i386/pc_q35.c
>@@ -57,10 +57,24 @@
> #include "hw/hyperv/vmbus-bridge.h"
> #include "hw/mem/nvdimm.h"
> #include "hw/i386/acpi-build.h"
>+#include "hw/xen/xen-x86.h"
>+#include "sysemu/xen.h"
> 
> /* ICH9 AHCI has 6 ports */
> #define MAX_SATA_PORTS     6
> 
>+static void q35_xen_hvm_init(MachineState *machine)
>+{
>+    PCMachineState *pcms = PC_MACHINE(machine);
>+
>+    if (xen_enabled()) {
>+        /* check if Xen Platform device is enabled */
>+        if (machine->xen_platform_dev) {
>+            pci_create_simple(pcms->bus, -1, "xen-platform");
>+        }
>+    }
>+}
>+
> struct ehci_companions {
>     const char *name;
>     int func;
>@@ -131,6 +145,7 @@ static void pc_q35_init(MachineState *machine)
>     MemoryRegion *system_io = get_system_io();
>     MemoryRegion *pci_memory;
>     MemoryRegion *rom_memory;
>+    MemoryRegion *ram_memory;
>     GSIState *gsi_state;
>     ISABus *isa_bus;
>     int i;
>@@ -182,8 +197,12 @@ static void pc_q35_init(MachineState *machine)
>     }
> 
>     pc_machine_init_sgx_epc(pcms);
>-    x86_cpus_init(x86ms, pcmc->default_cpu_version);
> 
>+    x86_cpus_init(x86ms, pcmc->default_cpu_version);
>+    if (xen_enabled()) {
>+        xen_hvm_init_pc(pcms, &ram_memory);
>+      machine->ram = ram_memory;
>+    }
>     kvmclock_create(pcmc->kvmclock_create_always);
> 
>     /* pci enabled */
>@@ -216,7 +235,15 @@ static void pc_q35_init(MachineState *machine)
>     }
> 
>     /* allocate ram and load rom/bios */
>-    pc_memory_init(pcms, system_memory, rom_memory, pci_hole64_size);
>+    if (!xen_enabled()) 
>+        pc_memory_init(pcms, system_memory, rom_memory, pci_hole64_size);
>+     else {
>+        pc_system_flash_cleanup_unused(pcms);
>+        if (machine->kernel_filename != NULL) {
>+            /* For xen HVM direct kernel boot, load linux here */
>+            xen_load_linux(pcms);
>+        }
>+    }
> 
>     object_property_add_child(OBJECT(machine), "q35", OBJECT(q35_host));
>     object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM,
>@@ -273,8 +300,12 @@ static void pc_q35_init(MachineState *machine)
>     for (i = 0; i < IOAPIC_NUM_PINS; i++) {
>         qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, x86ms->gsi[i]);
>     }
>-    isa_bus = ISA_BUS(qdev_get_child_bus(lpc_dev, "isa.0"));

Why move? I'd only move if it makes code consistent with piix.

> 
>+    if (xen_enabled()) {
>+        q35_xen_hvm_init(machine);
>+    }
>+
>+    isa_bus = ISA_BUS(qdev_get_child_bus(lpc_dev, "isa.0"));
>     if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
>         pc_i8259_create(isa_bus, gsi_state->i8259_irq);
>     }
>@@ -289,7 +320,7 @@ static void pc_q35_init(MachineState *machine)
> 
>     assert(pcms->vmport != ON_OFF_AUTO__MAX);
>     if (pcms->vmport == ON_OFF_AUTO_AUTO) {
>-        pcms->vmport = ON_OFF_AUTO_ON;
>+        pcms->vmport = xen_enabled() ? ON_OFF_AUTO_OFF : ON_OFF_AUTO_ON;
>     }
> 
>     /* init basic PC hardware */
>diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
>index 56641a550e..540ac46639 100644
>--- a/hw/i386/xen/xen-hvm.c
>+++ b/hw/i386/xen/xen-hvm.c
>@@ -15,6 +15,7 @@
> #include "hw/pci/pci.h"
> #include "hw/pci/pci_host.h"
> #include "hw/i386/pc.h"
>+#include "hw/southbridge/ich9.h"
> #include "hw/irq.h"
> #include "hw/hw.h"
> #include "hw/i386/apic-msidef.h"
>@@ -136,14 +137,14 @@ typedef struct XenIOState {
>     Notifier wakeup;
> } XenIOState;
> 
>-/* Xen specific function for piix pci */
>+/* Xen-specific functions for pci dev IRQ handling */
> 
>-int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
>+int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
> {
>     return irq_num + (PCI_SLOT(pci_dev->devfn) << 2);
> }
> 
>-void xen_piix3_set_irq(void *opaque, int irq_num, int level)

I think this function should have a different name meanwhile. Which would make 
renaming possibly unneccessary.

>+void xen_cmn_set_irq(void *opaque, int irq_num, int level)
> {
>     xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2,
>                            irq_num & 3, level);
>diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
>index 57f1d742c1..0375337222 100644
>--- a/hw/i386/xen/xen_platform.c
>+++ b/hw/i386/xen/xen_platform.c
>@@ -34,6 +34,7 @@
> #include "sysemu/block-backend.h"
> #include "qemu/error-report.h"
> #include "qemu/module.h"
>+#include "include/hw/i386/pc.h"
> #include "qom/object.h"
> 
> #ifdef CONFIG_XEN
>@@ -223,6 +224,12 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void 
>*opaque)
>         if (flags & UNPLUG_NVME_DISKS) {
>             object_unparent(OBJECT(d));
>         }
>+        break;
>+
>+    case PCI_CLASS_STORAGE_SATA:
>+      if (!aux) {
>+            object_unparent(OBJECT(d));
>+        }
> 
>     default:
>         break;
>@@ -231,7 +238,17 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void 
>*opaque)
> 
> static void pci_unplug_disks(PCIBus *bus, uint32_t flags)
> {
>-    pci_for_each_device(bus, 0, unplug_disks, &flags);
>+    PCIBus *q35 = find_q35();
>+    if (q35) {
>+        /* When q35 is detected, we will remove the ahci controller
>+       * with the hard disks.  In the libxl config, cdrom devices
>+       * are put on a seperate ahci controller. This allows for 6 cdrom
>+       * devices to be added, and 6 qemu hard disks.
>+       */
>+        pci_function_for_one_bus(bus, unplug_disks, &flags);
>+    } else {
>+        pci_for_each_device(bus, 0, unplug_disks, &flags);
>+    }
> }
> 
> static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, 
> uint32_t val)
>diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
>index 9c47a2f6c7..fbda6085d0 100644
>--- a/hw/isa/lpc_ich9.c
>+++ b/hw/isa/lpc_ich9.c
>@@ -51,6 +51,9 @@
> #include "hw/core/cpu.h"
> #include "hw/nvram/fw_cfg.h"
> #include "qemu/cutils.h"
>+#include "hw/xen/xen.h"
>+#include "sysemu/xen.h"
>+#include "hw/southbridge/piix.h"
> #include "hw/acpi/acpi_aml_interface.h"
> #include "trace.h"
> 
>@@ -535,11 +538,49 @@ static int ich9_lpc_post_load(void *opaque, int 
>version_id)
>     return 0;
> }
> 
>+static void ich9_lpc_config_write_xen(PCIDevice *d,
>+                                  uint32_t addr, uint32_t val, int len)
>+{   
>+    static bool pirqe_f_warned = false;
>+    if (ranges_overlap(addr, len, ICH9_LPC_PIRQA_ROUT, 4)) {
>+        /* handle PIRQA..PIRQD routing */
>+        /* Scan for updates to PCI link routes (0x60-0x63). */
>+        int i; 
>+        for (i = 0; i < len; i++) {
>+            uint8_t v = (val >> (8 * i)) & 0xff;
>+            if (v & 0x80) {
>+                v = 0;
>+            } 
>+            v &= 0xf;
>+            if (((addr + i) >= PIIX_PIRQCA) && ((addr + i) <= PIIX_PIRQCD)) {
>+                xen_set_pci_link_route(addr + i - PIIX_PIRQCA, v);
>+            }
>+        }
>+    } else if (ranges_overlap(addr, len, ICH9_LPC_PIRQE_ROUT, 4)) {
>+        while (len--) {
>+            if (range_covers_byte(ICH9_LPC_PIRQE_ROUT, 4, addr) &&
>+                (val & 0x80) == 0) {
>+                /* print warning only once */
>+                if (!pirqe_f_warned) {
>+                    pirqe_f_warned = true;
>+                    fprintf(stderr, "WARNING: guest domain attempted to use 
>PIRQ%c "
>+                            "routing which is not supported for Xen/Q35 
>currently\n",
>+                            (char)(addr - ICH9_LPC_PIRQE_ROUT + 'E'));
>+                    break;
>+                }
>+            }
>+            addr++, val >>= 8;
>+        }
>+    }
>+}
>+

Can we do the same trick here we're doing in piix -- by subscribing to PCI IRQ 
routing change events? The goal would be to leave LPC Xen-agnostic.

> static void ich9_lpc_config_write(PCIDevice *d,
>                                   uint32_t addr, uint32_t val, int len)
> {
>     ICH9LPCState *lpc = ICH9_LPC_DEVICE(d);
>     uint32_t rcba_old = pci_get_long(d->config + ICH9_LPC_RC
>+    if (xen_enabled())
>+        ich9_lpc_config_write_xen(d, addr, val, len);

You could emit a PCI routing change event here unconditionally and subscribe to 
it in pc_q35.

> 
>     pci_default_write_config(d, addr, val, len);
>     if (ranges_overlap(addr, len, ICH9_LPC_PMBASE, 4) ||
>@@ -731,10 +772,14 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp)
>         return;
>     }
> 
>-    pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS);
>-    pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq);
>-    pci_bus_set_route_irq_fn(pci_bus, ich9_route_intx_pin_to_irq);
>-
>+    if (xen_enabled()) {
>+        pci_bus_irqs(pci_bus, xen_cmn_set_irq, d, ICH9_XEN_NUM_IRQ_SOURCES);
>+        pci_bus_map_irqs(pci_bus, xen_cmn_pci_slot_get_pirq);
>+    } else {
>+        pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS);
>+        pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq);
>+        pci_bus_set_route_irq_fn(pci_bus, ich9_route_intx_pin_to_irq);
>+    }

This could probably be wired up Xen-agnostic as well. See pc_piix for 
inspiration.

>     ich9_lpc_pm_init(lpc);
> }
> 
>diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
>index f9103ea45a..3d0545eb0e 100644
>--- a/hw/isa/piix3.c
>+++ b/hw/isa/piix3.c
>@@ -420,7 +420,7 @@ static void piix3_xen_realize(PCIDevice *dev, Error **errp)
>      * connected to the IOAPIC directly.
>      * These additional routes can be discovered through ACPI.
>      */
>-    pci_bus_irqs(pci_bus, xen_piix3_set_irq, piix3, XEN_PIIX_NUM_PIRQS);
>+    pci_bus_irqs(pci_bus, xen_cmn_set_irq, piix3, XEN_PIIX_NUM_PIRQS);

This code has changed recently. PIIX3 doesn't know about Xen any more. Take 
some inspiration from there to keep LPC Xen-agnostic.

That's it for now from my side.

Best regards,
Bernhard

> }
> 
> static void piix3_xen_class_init(ObjectClass *klass, void *data)
>diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
>index fd18920e7f..3bb6325fb3 100644
>--- a/hw/pci-host/q35.c
>+++ b/hw/pci-host/q35.c
>@@ -37,6 +37,7 @@
> #include "qapi/error.h"
> #include "qapi/visitor.h"
> #include "qemu/module.h"
>+#include "sysemu/xen.h"
> 
> /****************************************************************************
>  * Q35 host
>@@ -259,6 +260,15 @@ static void q35_host_initfn(Object *obj)
>                              qdev_prop_allow_set_link_before_realize, 0);
> }
> 
>+PCIBus *find_q35(void)
>+{
>+    PCIHostState *s = OBJECT_CHECK(PCIHostState,
>+                                   object_resolve_path("/machine/q35", NULL),
>+                                   TYPE_PCI_HOST_BRIDGE);
>+    return s ? s->bus : NULL;
>+}
>+
>+
> static const TypeInfo q35_host_info = {
>     .name       = TYPE_Q35_HOST_DEVICE,
>     .parent     = TYPE_PCIE_HOST_BRIDGE,
>@@ -315,12 +325,21 @@ static void mch_update_pciexbar(MCHPCIState *mch)
>         break;
>     case MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_128M:
>         length = 128 * 1024 * 1024;
>-        addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_128ADMSK |
>-            MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK;
>+      if (!xen_enabled()) {
>+            addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_128ADMSK |
>+                MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK;
>+      } else {
>+            addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_128ADMSK;
>+        }
>         break;
>     case MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_64M:
>         length = 64 * 1024 * 1024;
>-        addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK;
>+      if (!xen_enabled()) {
>+            addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK;
>+      } else {
>+            addr_mask |= MCH_HOST_BRIDGE_PCIEXBAR_64ADMSK |
>+                MCH_HOST_BRIDGE_PCIEXBAR_128ADMSK;    
>+        }
>         break;
>     case MCH_HOST_BRIDGE_PCIEXBAR_LENGTH_RVD:
>         qemu_log_mask(LOG_GUEST_ERROR, "Q35: Reserved PCIEXBAR LENGTH\n");
>@@ -561,7 +580,8 @@ static void mch_reset(DeviceState *qdev)
>     d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
>     d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
> 
>-    mch_update(mch);
>+    if (!xen_enabled())
>+        mch_update(mch);
> }
> 
> static void mch_realize(PCIDevice *d, Error **errp)
>diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>index 1cc7c89036..8eac3d751a 100644
>--- a/hw/pci/pci.c
>+++ b/hw/pci/pci.c
>@@ -1815,6 +1815,23 @@ void pci_for_each_device_reverse(PCIBus *bus, int 
>bus_num,
>     }
> }
> 
>+void pci_function_for_one_bus(PCIBus *bus,
>+                          void (*fn)(PCIBus *b, PCIDevice *d, void *opaque),
>+                          void *opaque)
>+{
>+    bus = pci_find_bus_nr(bus, 0);
>+
>+    if (bus) {
>+        PCIDevice *d;
>+
>+        d = bus->devices[PCI_DEVFN(4,0)];
>+        if (d) {
>+            fn(bus, d, opaque);
>+            return;
>+        }
>+    }
>+}
>+
> void pci_for_each_device_under_bus(PCIBus *bus,
>                                    pci_bus_dev_fn fn, void *opaque)
> {
>diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
>index 8c6e9a1716..63481a859e 100644
>--- a/hw/xen/xen-host-pci-device.c
>+++ b/hw/xen/xen-host-pci-device.c
>@@ -32,6 +32,7 @@
> 
> #define IORESOURCE_PREFETCH     0x00001000      /* No side effects */
> #define IORESOURCE_MEM_64       0x00100000
>+#define XEN_HOST_PCI_CAP_MAX    48
> 
> static void xen_host_pci_sysfs_path(const XenHostPCIDevice *d,
>                                     const char *name, char *buf, ssize_t size)
>@@ -198,6 +199,19 @@ static bool xen_host_pci_dev_is_virtfn(XenHostPCIDevice 
>*d)
>     return !stat(path, &buf);
> }
> 
>+static bool xen_host_pci_dev_has_pcie_ext_caps(XenHostPCIDevice *d)
>+{
>+    uint32_t header;
>+
>+    if (xen_host_pci_get_long(d, PCI_CONFIG_SPACE_SIZE, &header))
>+        return false;
>+
>+    if (header == 0 || header == ~0U)
>+        return false;
>+
>+    return true;
>+}
>+
> static void xen_host_pci_config_open(XenHostPCIDevice *d, Error **errp)
> {
>     char path[PATH_MAX];
>@@ -296,37 +310,93 @@ int xen_host_pci_set_block(XenHostPCIDevice *d, int pos, 
>uint8_t *buf, int len)
>     return xen_host_pci_config_write(d, pos, buf, len);
> }
> 
>-int xen_host_pci_find_ext_cap_offset(XenHostPCIDevice *d, uint32_t cap)
>+int xen_host_pci_find_next_ext_cap(XenHostPCIDevice *d, int pos, uint32_t cap)
> {
>     uint32_t header = 0;
>     int max_cap = XEN_HOST_PCI_MAX_EXT_CAP;
>-    int pos = PCI_CONFIG_SPACE_SIZE;
>+
>+    if (!d->has_pcie_ext_caps)
>+        return 0;
>+
>+    if (!pos) {
>+        pos = PCI_CONFIG_SPACE_SIZE;
>+    } else {
>+        if (xen_host_pci_get_long(d, pos, &header))
>+            return 0;
>+
>+        pos = PCI_EXT_CAP_NEXT(header);
>+    }
> 
>     do {
>+        if (!pos || pos < PCI_CONFIG_SPACE_SIZE) {
>+            break;
>+        }
>+
>         if (xen_host_pci_get_long(d, pos, &header)) {
>             break;
>         }
>         /*
>          * If we have no capabilities, this is indicated by cap ID,
>          * cap version and next pointer all being 0.
>+       * Also check for all F's returned (which means PCIe ext conf space
>+       * is unreadable for some reason)
>          */
>-        if (header == 0) {
>+      if (header == 0 || header == ~0U) {
>             break;
>         }
> 
>-        if (PCI_EXT_CAP_ID(header) == cap) {
>+        if (cap == CAP_ID_ANY) {
>+            return pos;
>+        } else if (PCI_EXT_CAP_ID(header) == cap) {
>             return pos;
>         }
> 
>         pos = PCI_EXT_CAP_NEXT(header);
>-        if (pos < PCI_CONFIG_SPACE_SIZE) {
>+    } while (--max_cap);
>+
>+    return 0;
>+}
>+
>+int xen_host_pci_find_next_cap(XenHostPCIDevice *d, int pos, uint32_t cap)
>+{
>+    uint8_t id;
>+    unsigned max_cap = XEN_HOST_PCI_CAP_MAX;
>+    uint8_t status = 0;
>+    uint8_t curpos;
>+
>+    if (xen_host_pci_get_byte(d, PCI_STATUS, &status))
>+        return 0;
>+
>+    if ((status & PCI_STATUS_CAP_LIST) == 0)
>+        return 0;
>+
>+    if (pos < PCI_CAPABILITY_LIST) {
>+        curpos = PCI_CAPABILITY_LIST;
>+    } else {
>+        curpos = (uint8_t) pos;
>+    }
>+
>+    while (max_cap--) {
>+        if (xen_host_pci_get_byte(d, curpos, &curpos))
>+             break;
>+        if (!curpos)
>+             break;
>+
>+        if (cap == CAP_ID_ANY)
>+            return curpos;
>+
>+        if (xen_host_pci_get_byte(d, curpos + PCI_CAP_LIST_ID, &id))
>             break;
>-        }
> 
>-        max_cap--;
>-    } while (max_cap > 0);
>+        if (id == 0xff)
>+            break;
>+        else if (id == cap)
>+            return curpos;
>+
>+        curpos += PCI_CAP_LIST_NEXT;
>+    }
> 
>-    return -1;
>+    return 0;
> }
> 
> void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain,
>@@ -335,6 +405,7 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t 
>domain,
> {
>     ERRP_GUARD();
>     unsigned int v;
>+    int pcie_cap_pos;
> 
>     d->config_fd = -1;
>     d->domain = domain;
>@@ -376,7 +447,22 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, 
>uint16_t domain,
>     }
>     d->class_code = v;
> 
>-    d->is_virtfn = xen_host_pci_dev_is_virtfn(d);
>+    d->is_virtfn         = xen_host_pci_dev_is_virtfn(d);
>+    d->has_pcie_ext_caps = xen_host_pci_dev_has_pcie_ext_caps(d);
>+
>+    /* read and store PCIe Capabilities field for later use */
>+    pcie_cap_pos = xen_host_pci_find_next_cap(d, 0, PCI_CAP_ID_EXP);
>+
>+    if (pcie_cap_pos) {
>+        if (xen_host_pci_get_word(d, pcie_cap_pos + PCI_EXP_FLAGS,
>+                                  &d->pcie_flags)) {
>+            error_setg(errp, "Unable to read from PCI Express capability "
>+                       "structure at 0x%x", pcie_cap_pos);
>+            goto error;
>+        }
>+    } else {
>+        d->pcie_flags = 0xFFFF;
>+    }
> 
>     return;
> 
>diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h
>index 4d8d34ecb0..2884c4b4b9 100644
>--- a/hw/xen/xen-host-pci-device.h
>+++ b/hw/xen/xen-host-pci-device.h
>@@ -27,11 +27,13 @@ typedef struct XenHostPCIDevice {
>     uint16_t device_id;
>     uint32_t class_code;
>     int irq;
>+    uint16_t pcie_flags;
> 
>     XenHostPCIIORegion io_regions[PCI_NUM_REGIONS - 1];
>     XenHostPCIIORegion rom;
> 
>     bool is_virtfn;
>+    bool has_pcie_ext_caps;
> 
>     int config_fd;
> } XenHostPCIDevice;
>@@ -53,6 +55,8 @@ int xen_host_pci_set_long(XenHostPCIDevice *d, int pos, 
>uint32_t data);
> int xen_host_pci_set_block(XenHostPCIDevice *d, int pos, uint8_t *buf,
>                            int len);
> 
>-int xen_host_pci_find_ext_cap_offset(XenHostPCIDevice *s, uint32_t cap);
>+#define CAP_ID_ANY  (~0U)
>+int xen_host_pci_find_next_cap(XenHostPCIDevice *s, int pos, uint32_t cap);
>+int xen_host_pci_find_next_ext_cap(XenHostPCIDevice *d, int pos, uint32_t 
>cap);
> 
> #endif /* XEN_HOST_PCI_DEVICE_H */
>diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
>index a540149639..2399fabb2b 100644
>--- a/hw/xen/xen_pt.c
>+++ b/hw/xen/xen_pt.c
>@@ -96,8 +96,16 @@ void xen_pt_log(const PCIDevice *d, const char *f, ...)
> 
> static int xen_pt_pci_config_access_check(PCIDevice *d, uint32_t addr, int 
> len)
> {
>+    XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
>     /* check offset range */
>-    if (addr > 0xFF) {
>+    if (s->pcie_enabled_dev) {
>+        if (addr >= PCIE_CONFIG_SPACE_SIZE) {
>+            XEN_PT_ERR(d, "Failed to access register with offset "
>+                          "exceeding 0xFFF. (addr: 0x%02x, len: %d)\n",
>+                          addr, len);
>+            return -1;
>+        }
>+    } else if (addr >= PCI_CONFIG_SPACE_SIZE) {
>         XEN_PT_ERR(d, "Failed to access register with offset exceeding 0xFF. "
>                    "(addr: 0x%02x, len: %d)\n", addr, len);
>         return -1;
>@@ -156,7 +164,16 @@ static uint32_t xen_pt_pci_read_config(PCIDevice *d, 
>uint32_t addr, int len)
>     reg_grp_entry = xen_pt_find_reg_grp(s, addr);
>     if (reg_grp_entry) {
>         /* check 0-Hardwired register group */
>-        if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) {
>+        if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED &&
>+            /*
>+             * For PCIe Extended Capabilities we need to emulate
>+             * CapabilityID and NextCapability/Version registers for a
>+             * hardwired reg group located at the offset 0x100 in PCIe
>+             * config space. This allows us to hide the first extended
>+             * capability as well.
>+             */
>+            !(reg_grp_entry->base_offset == PCI_CONFIG_SPACE_SIZE &&
>+            ranges_overlap(addr, len, 0x100, 4))) {
>             /* no need to emulate, just return 0 */
>             val = 0;
>             goto exit;
>@@ -701,6 +718,21 @@ static const MemoryListener xen_pt_io_listener = {
>     .priority = 10,
> };
> 
>+static inline bool xen_pt_dev_is_pcie_mode(PCIDevice *d)
>+{
>+    XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
>+    PCIBus *bus = pci_get_bus(d);
>+
>+    if (bus != NULL) {
>+        if (pci_is_express(d) && pci_bus_is_express(bus) &&
>+            xen_host_pci_find_next_cap(&s->real_device, 0, PCI_CAP_ID_EXP)) {
>+            return true;
>+        }
>+    }
>+
>+    return false;
>+}
>+
> /* destroy. */
> static void xen_pt_destroy(PCIDevice *d) {
> 
>@@ -787,8 +819,17 @@ static void xen_pt_realize(PCIDevice *d, Error **errp)
>                    s->real_device.dev, s->real_device.func);
>     }
> 
>-    /* Initialize virtualized PCI configuration (Extended 256 Bytes) */
>-    memset(d->config, 0, PCI_CONFIG_SPACE_SIZE);
>+    s->pcie_enabled_dev = xen_pt_dev_is_pcie_mode(d);
>+    if (s->pcie_enabled_dev) {
>+        XEN_PT_LOG(d, "Host device %04x:%02x:%02x.%d passed thru "
>+                   "in PCIe mode\n", s->real_device.domain,
>+                    s->real_device.bus, s->real_device.dev,
>+                    s->real_device.func);
>+    }
>+
>+    /* Initialize virtualized PCI configuration space (256/4K bytes) */
>+    memset(d->config, 0, pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE
>+                                           : PCI_CONFIG_SPACE_SIZE);
> 
>     s->memory_listener = xen_pt_memory_listener;
>     s->io_listener = xen_pt_io_listener;
>diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
>index b20744f7c7..0ed6e77e76 100644
>--- a/hw/xen/xen_pt.h
>+++ b/hw/xen/xen_pt.h
>@@ -33,6 +33,11 @@ void xen_pt_log(const PCIDevice *d, const char *f, ...) 
>G_GNUC_PRINTF(2, 3);
> /* Helper */
> #define XEN_PFN(x) ((x) >> XC_PAGE_SHIFT)
> 
>+/* Macro's for PCIe Extended Capabilities */
>+#define PCIE_EXT_CAP_ID(cap_id)     ((cap_id) | (1U << 16))
>+#define IS_PCIE_EXT_CAP_ID(grp_id)  ((grp_id) & (1U << 16))
>+#define GET_PCIE_EXT_CAP_ID(grp_id) ((grp_id) & 0xFFFF)
>+
> typedef const struct XenPTRegInfo XenPTRegInfo;
> typedef struct XenPTReg XenPTReg;
> 
>@@ -88,6 +93,11 @@ typedef int (*xen_pt_conf_byte_read)
> 
> #define XEN_PCI_INTEL_OPREGION 0xfc
> 
>+#define XEN_PCIE_CAP_ID 0
>+#define XEN_PCIE_CAP_LIST_NEXT 2
>+#define XEN_PCIE_FAKE_CAP_ID_BASE 0xFE00
>+
>+
> #define XEN_PCI_IGD_DOMAIN 0
> #define XEN_PCI_IGD_BUS 0
> #define XEN_PCI_IGD_DEV 2
>@@ -174,13 +184,13 @@ typedef const struct XenPTRegGroupInfo XenPTRegGroupInfo;
> /* emul reg group size initialize method */
> typedef int (*xen_pt_reg_size_init_fn)
>     (XenPCIPassthroughState *, XenPTRegGroupInfo *,
>-     uint32_t base_offset, uint8_t *size);
>+     uint32_t base_offset, uint32_t *size);
> 
> /* emulated register group information */
> struct XenPTRegGroupInfo {
>-    uint8_t grp_id;
>+    uint32_t grp_id;
>     XenPTRegisterGroupType grp_type;
>-    uint8_t grp_size;
>+    uint32_t grp_size;
>     xen_pt_reg_size_init_fn size_init;
>     XenPTRegInfo *emu_regs;
> };
>@@ -190,7 +200,7 @@ typedef struct XenPTRegGroup {
>     QLIST_ENTRY(XenPTRegGroup) entries;
>     XenPTRegGroupInfo *reg_grp;
>     uint32_t base_offset;
>-    uint8_t size;
>+    uint32_t size;
>     QLIST_HEAD(, XenPTReg) reg_tbl_list;
> } XenPTRegGroup;
> 
>@@ -234,6 +244,7 @@ struct XenPCIPassthroughState {
> 
>     PCIHostDeviceAddress hostaddr;
>     bool is_virtfn;
>+    bool pcie_enabled_dev;
>     bool permissive;
>     bool permissive_warned;
>     XenHostPCIDevice real_device;
>diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
>index 2b8680b112..cc583f9cc1 100644
>--- a/hw/xen/xen_pt_config_init.c
>+++ b/hw/xen/xen_pt_config_init.c
>@@ -16,6 +16,7 @@
> #include "qapi/error.h"
> #include "qemu/timer.h"
> #include "xen_pt.h"
>+#include "xen-host-pci-device.h"
> #include "hw/xen/xen-legacy-backend.h"
> 
> #define XEN_PT_MERGE_VALUE(value, data, val_mask) \
>@@ -27,33 +28,52 @@
> 
> static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg,
>                                uint32_t real_offset, uint32_t *data);
>-
>+static int xen_pt_ext_cap_ptr_reg_init(XenPCIPassthroughState *s,
>+                                       XenPTRegInfo *reg,
>+                                       uint32_t real_offset,
>+                                       uint32_t *data);
>+static int xen_pt_ext_cap_capid_reg_init(XenPCIPassthroughState *s,
>+                                         XenPTRegInfo *reg,
>+                                         uint32_t real_offset,
>+                                         uint32_t *data);
> 
> /* helper */
> 
> /* A return value of 1 means the capability should NOT be exposed to guest. */
>-static int xen_pt_hide_dev_cap(const XenHostPCIDevice *d, uint8_t grp_id)
>-{
>-    switch (grp_id) {
>-    case PCI_CAP_ID_EXP:
>-        /* The PCI Express Capability Structure of the VF of Intel 82599 10GbE
>-         * Controller looks trivial, e.g., the PCI Express Capabilities
>-         * Register is 0. We should not try to expose it to guest.
>-         *
>-         * The datasheet is available at
>-         * 
>http://download.intel.com/design/network/datashts/82599_datasheet.pdf
>-         *
>-         * See 'Table 9.7. VF PCIe Configuration Space' of the datasheet, the
>-         * PCI Express Capability Structure of the VF of Intel 82599 10GbE
>-         * Controller looks trivial, e.g., the PCI Express Capabilities
>-         * Register is 0, so the Capability Version is 0 and
>-         * xen_pt_pcie_size_init() would fail.
>-         */
>-        if (d->vendor_id == PCI_VENDOR_ID_INTEL &&
>-            d->device_id == PCI_DEVICE_ID_INTEL_82599_SFP_VF) {
>-            return 1;
>+static int xen_pt_hide_dev_cap(const XenHostPCIDevice *d, uint32_t grp_id)

Reply via email to