Allocate new segment for pxb-pcie host bridges in MCFG table, and reserve
corresponding MCFG space for them. This allows user-defined pxb-pcie
host bridges to be placed in different pci domain than q35 host.

The pci_host_bridges list is changed to be tail list to ensure the q35 host
is always the first element when traversing the list, because q35 host is
inserted beofre pxb-pcie hosts

Signed-off-by: Zihan Yang <whois.zihan.y...@gmail.com>
---
 hw/i386/acpi-build.c                        | 116 +++++++++++++++++++++++-----
 hw/i386/pc.c                                |  14 +++-
 hw/pci-bridge/pci_expander_bridge.c         |  57 ++++++++++----
 hw/pci-host/q35.c                           |   2 +
 hw/pci/pci.c                                |   9 ++-
 include/hw/i386/pc.h                        |   1 +
 include/hw/pci-bridge/pci_expander_bridge.h |  11 +++
 include/hw/pci-host/q35.h                   |   1 +
 include/hw/pci/pci_host.h                   |   2 +-
 9 files changed, 169 insertions(+), 44 deletions(-)
 create mode 100644 include/hw/pci-bridge/pci_expander_bridge.h

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index e1ee8ae..c0fc2b4 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -55,6 +55,7 @@
 #include "hw/i386/ich9.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci-host/q35.h"
+#include "hw/pci-bridge/pci_expander_bridge.h"
 #include "hw/i386/x86-iommu.h"
 
 #include "hw/acpi/aml-build.h"
@@ -89,6 +90,9 @@
 typedef struct AcpiMcfgInfo {
     uint64_t mcfg_base;
     uint32_t mcfg_size;
+    uint32_t domain_nr;
+    uint8_t bus_nr; // start bus number
+    struct AcpiMcfgInfo *next;
 } AcpiMcfgInfo;
 
 typedef struct AcpiPmInfo {
@@ -2431,14 +2435,16 @@ build_mcfg_q35(GArray *table_data, BIOSLinker *linker, 
AcpiMcfgInfo *info)
 {
     AcpiTableMcfg *mcfg;
     const char *sig;
-    int len = sizeof(*mcfg) + 1 * sizeof(mcfg->allocation[0]);
+    int len, count = 0;
+    AcpiMcfgInfo *cfg = info;
+
+    while (cfg) {
+        ++count;
+        cfg = cfg->next;
+    }
+    len = sizeof(*mcfg) + count * sizeof(mcfg->allocation[0]);
 
     mcfg = acpi_data_push(table_data, len);
-    mcfg->allocation[0].address = cpu_to_le64(info->mcfg_base);
-    /* Only a single allocation so no need to play with segments */
-    mcfg->allocation[0].pci_segment = cpu_to_le16(0);
-    mcfg->allocation[0].start_bus_number = 0;
-    mcfg->allocation[0].end_bus_number = PCIE_MMCFG_BUS(info->mcfg_size - 1);
 
     /* MCFG is used for ECAM which can be enabled or disabled by guest.
      * To avoid table size changes (which create migration issues),
@@ -2452,6 +2458,17 @@ build_mcfg_q35(GArray *table_data, BIOSLinker *linker, 
AcpiMcfgInfo *info)
     } else {
         sig = "MCFG";
     }
+
+    count = 0;
+    while (info) {
+        mcfg[0].allocation[count].address = cpu_to_le64(info->mcfg_base);
+        mcfg[0].allocation[count].pci_segment = cpu_to_le16(info->domain_nr);
+        mcfg[0].allocation[count].start_bus_number = info->bus_nr;
+        mcfg[0].allocation[count++].end_bus_number = info->bus_nr + \
+                                    PCIE_MMCFG_BUS(info->mcfg_size - 1);
+        info = info->next;
+    }
+
     build_header(linker, table_data, (void *)mcfg, sig, len, 1, NULL, NULL);
 }
 
@@ -2606,26 +2623,83 @@ struct AcpiBuildState {
     MemoryRegion *linker_mr;
 } AcpiBuildState;
 
-static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
+static inline void cleanup_mcfg(AcpiMcfgInfo *mcfg)
+{
+    AcpiMcfgInfo *tmp;
+    while (mcfg) {
+        tmp = mcfg->next;
+        g_free(mcfg);
+        mcfg = tmp;
+    }
+}
+
+static AcpiMcfgInfo *acpi_get_mcfg(void)
 {
     Object *pci_host;
     QObject *o;
+    uint32_t domain_nr;
+    AcpiMcfgInfo *head = NULL, *tail, *mcfg;
 
     pci_host = acpi_get_i386_pci_host();
     g_assert(pci_host);
 
-    o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_BASE, NULL);
-    if (!o) {
-        return false;
+    while (pci_host) {
+        /* pxb-pcie-hosts does not have domain_nr property, but a link
+         * to PXBDev. We first try to get pxbdev property, if NULL,
+         * then it is q35 host, otherwise it is pxb-pcie-host */
+        Object *obj = object_property_get_link(pci_host,
+                                           PROP_PXB_PCIE_DEV, NULL);
+        if (!obj) {
+            /* we are in q35 host */
+            obj = pci_host;
+        }
+        o = object_property_get_qobject(obj, PROP_PXB_PCIE_DOMAIN_NR, NULL);
+        assert(o);
+        domain_nr = qnum_get_uint(qobject_to(QNum, o));
+        qobject_unref(o);
+
+        /* Skip bridges that reside in the same domain with q35 host.
+         * Q35 always stays in pci domain 0, and is the first element
+         * in the pci_host_bridges list */
+        if (head && domain_nr == 0) {
+            pci_host = OBJECT(QTAILQ_NEXT(PCI_HOST_BRIDGE(pci_host), next));
+            continue;
+        }
+
+        mcfg = g_new0(AcpiMcfgInfo, 1);
+        mcfg->next = NULL;
+        if (!head) {
+            tail = head = mcfg;
+        } else {
+            tail->next = mcfg;
+            tail = mcfg;
+        }
+        mcfg->domain_nr = domain_nr;
+
+        o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_BASE, NULL);
+        assert(o);
+        mcfg->mcfg_base = qnum_get_uint(qobject_to(QNum, o));
+        qobject_unref(o);
+
+        /* firmware will overwrite it */
+        o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_SIZE, NULL);
+        assert(o);
+        mcfg->mcfg_size = qnum_get_uint(qobject_to(QNum, o));
+        qobject_unref(o);
+
+        o = object_property_get_qobject(obj, PROP_PXB_BUS_NR, NULL);
+        if (!o) {
+            /* we are in q35 host again */
+            mcfg->bus_nr = 0;
+        } else {
+            mcfg->bus_nr = qnum_get_uint(qobject_to(QNum, o));
+            qobject_unref(o);
+        }
+
+        pci_host = OBJECT(QTAILQ_NEXT(PCI_HOST_BRIDGE(pci_host), next));
     }
-    mcfg->mcfg_base = qnum_get_uint(qobject_to(QNum, o));
-    qobject_unref(o);
 
-    o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_SIZE, NULL);
-    assert(o);
-    mcfg->mcfg_size = qnum_get_uint(qobject_to(QNum, o));
-    qobject_unref(o);
-    return true;
+    return head;
 }
 
 static
@@ -2637,7 +2711,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState 
*machine)
     unsigned facs, dsdt, rsdt, fadt;
     AcpiPmInfo pm;
     AcpiMiscInfo misc;
-    AcpiMcfgInfo mcfg;
+    AcpiMcfgInfo *mcfg;
     Range pci_hole, pci_hole64;
     uint8_t *u;
     size_t aml_len = 0;
@@ -2718,10 +2792,12 @@ void acpi_build(AcpiBuildTables *tables, MachineState 
*machine)
             build_slit(tables_blob, tables->linker);
         }
     }
-    if (acpi_get_mcfg(&mcfg)) {
+    if ((mcfg = acpi_get_mcfg()) != NULL) {
         acpi_add_table(table_offsets, tables_blob);
-        build_mcfg_q35(tables_blob, tables->linker, &mcfg);
+        build_mcfg_q35(tables_blob, tables->linker, mcfg);
     }
+    cleanup_mcfg(mcfg);
+
     if (x86_iommu_get_default()) {
         IommuType IOMMUType = x86_iommu_get_type();
         if (IOMMUType == TYPE_AMD) {
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 83a4444..a7e51af 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -35,6 +35,7 @@
 #include "hw/ide.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
+#include "hw/pci-bridge/pci_expander_bridge.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/timer/hpet.h"
 #include "hw/smbios/smbios.h"
@@ -1470,15 +1471,24 @@ uint64_t pc_pci_hole64_start(void)
     if (pcmc->has_reserved_memory && ms->device_memory->base) {
         hole64_start = ms->device_memory->base;
         if (!pcmc->broken_reserved_end) {
-            hole64_start += memory_region_size(&ms->device_memory->mr);
+            hole64_start += (memory_region_size(&ms->device_memory->mr) + \
+                             pxb_pcie_mcfg_hole());
         }
     } else {
-        hole64_start = 0x100000000ULL + pcms->above_4g_mem_size;
+        /* memory layout [RAM Hotplug][MCFG][..ROUND UP..][PCI HOLE] */
+        hole64_start = pc_pci_mcfg_start() + pxb_pcie_mcfg_hole();
     }
 
     return ROUND_UP(hole64_start, 1 * GiB);
 }
 
+uint64_t pc_pci_mcfg_start(void)
+{
+    PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
+
+    return ROUND_UP(0x100000000ULL + pcms->above_4g_mem_size, 4 * KiB);
+}
+
 qemu_irq pc_allocate_cpu_irq(void)
 {
     return qemu_allocate_irq(pic_irq_request, NULL, 0);
diff --git a/hw/pci-bridge/pci_expander_bridge.c 
b/hw/pci-bridge/pci_expander_bridge.c
index 6dd38de..f50938f 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -12,15 +12,19 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "hw/i386/pc.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_host.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci_bridge.h"
+#include "hw/pci-host/q35.h"
+#include "hw/pci-bridge/pci_expander_bridge.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
 #include "sysemu/numa.h"
 #include "qapi/visitor.h"
+#include "qemu/units.h"
 
 #define TYPE_PXB_BUS "pxb-bus"
 #define PXB_BUS(obj) OBJECT_CHECK(PXBBus, (obj), TYPE_PXB_BUS)
@@ -42,11 +46,7 @@ typedef struct PXBBus {
 #define TYPE_PXB_PCIE_DEVICE "pxb-pcie"
 #define PXB_PCIE_DEV(obj) OBJECT_CHECK(PXBDev, (obj), TYPE_PXB_PCIE_DEVICE)
 
-#define PROP_PXB_PCIE_DEV "pxbdev"
-
-#define PROP_PXB_PCIE_DOMAIN_NR "domain_nr"
 #define PROP_PXB_PCIE_MAX_BUS "max_bus"
-#define PROP_PXB_BUS_NR "bus_nr"
 #define PROP_PXB_NUMA_NODE "numa_node"
 
 typedef struct PXBDev {
@@ -122,6 +122,26 @@ static const TypeInfo pxb_pcie_bus_info = {
     .class_init    = pxb_bus_class_init,
 };
 
+static uint64_t pxb_mcfg_hole_size = 0;
+
+static void pxb_pcie_foreach(gpointer data, gpointer user_data)
+{
+    PXBDev *pxb = (PXBDev *)data;
+
+    if (pxb->domain_nr > 0) {
+        /* only reserve what users ask for to reduce memory cost. Plus one
+         * as the interval [bus_nr, max_bus] has (max_bus-bus_nr+1) buses */
+        pxb_mcfg_hole_size += ((pxb->max_bus - pxb->bus_nr + 1ULL) * MiB);
+    }
+}
+
+uint64_t pxb_pcie_mcfg_hole(void)
+{
+    /* foreach is necessary as some pxb still reside in domain 0 */
+    g_list_foreach(pxb_dev_list, pxb_pcie_foreach, NULL);
+    return pxb_mcfg_hole_size;
+}
+
 static const char *pxb_host_root_bus_path(PCIHostState *host_bridge,
                                           PCIBus *rootbus)
 {
@@ -153,14 +173,6 @@ static const char 
*pxb_pcie_host_root_bus_path(PCIHostState *host_bridge,
     return bus->bus_path;
 }
 
-static void pxb_pcie_host_get_mmcfg_size(Object *obj, Visitor *v, const char 
*name,
-                                    void *opaque, Error **errp)
-{
-    PCIExpressHost *e = PCIE_HOST_BRIDGE(obj);
-
-    visit_type_uint64(v, name, &e->size, errp);
-}
-
 static char *pxb_host_ofw_unit_address(const SysBusDevice *dev)
 {
     const PCIHostState *pxb_host;
@@ -202,10 +214,6 @@ static void pxb_pcie_host_initfn(Object *obj)
     memory_region_init_io(&phb->data_mem, obj, &pci_host_data_le_ops, phb,
                           "pci-conf-data", 4);
 
-    object_property_add(obj, PCIE_HOST_MCFG_SIZE, "uint64",
-                         pxb_pcie_host_get_mmcfg_size,
-                         NULL, NULL, NULL, NULL);
-
     object_property_add_link(obj, PROP_PXB_PCIE_DEV, TYPE_PXB_PCIE_DEVICE,
                          (Object **)&s->pxbdev,
                          qdev_prop_allow_set_link_before_realize, 0, NULL);
@@ -214,6 +222,7 @@ static void pxb_pcie_host_initfn(Object *obj)
 static Property pxb_pcie_host_props[] = {
     DEFINE_PROP_UINT64(PCIE_HOST_MCFG_BASE, PXBPCIEHost, parent_obj.base_addr,
                         PCIE_BASE_ADDR_UNMAPPED),
+    DEFINE_PROP_UINT64(PCIE_HOST_MCFG_SIZE, PXBPCIEHost, parent_obj.size, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -310,6 +319,8 @@ static gint pxb_compare(gconstpointer a, gconstpointer b)
            0;
 }
 
+static uint64_t pxb_pcie_mcfg_base;
+
 static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp)
 {
     PXBDev *pxb = convert_to_pxb(dev);
@@ -333,7 +344,16 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool 
pcie, Error **errp)
         ds = qdev_create(NULL, TYPE_PXB_PCIE_HOST);
 
         object_property_set_link(OBJECT(ds), OBJECT(pxb),
-                                 PROP_PXB_PCIE_DEV, NULL);
+                                 PROP_PXB_PCIE_DEV, errp);
+
+        /* will be overwritten by firmware, but kept for readability */
+        qdev_prop_set_uint64(ds, PCIE_HOST_MCFG_BASE,
+            pxb->domain_nr ? pxb_pcie_mcfg_base : 
MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT);
+        /* +1 because [bus_nr, max_bus] has (max_bus-bus_nr+1) buses */
+        qdev_prop_set_uint64(ds, PCIE_HOST_MCFG_SIZE,
+            pxb->domain_nr ? (pxb->max_bus - pxb->bus_nr + 1ULL) * MiB : 0);
+        if (pxb->domain_nr)
+            pxb_pcie_mcfg_base += ((pxb->max_bus + 1ULL) * MiB);
 
         bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_PCIE_BUS);
     } else {
@@ -445,6 +465,9 @@ static void pxb_pcie_dev_realize(PCIDevice *dev, Error 
**errp)
         return;
     }
 
+    if (0 == pxb_pcie_mcfg_base)
+        pxb_pcie_mcfg_base = pc_pci_mcfg_start();
+
     pxb_dev_realize_common(dev, true, errp);
 }
 
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 02f9576..10e4801 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -177,6 +177,8 @@ static Property q35_host_props[] = {
                      mch.below_4g_mem_size, 0),
     DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost,
                      mch.above_4g_mem_size, 0),
+    /* q35 host bridge should always stay in pci domain 0 */
+    DEFINE_PROP_UINT32("domain_nr", Q35PCIHost, domain_nr, 0),
     DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true),
     DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 80bc459..ddc27ba 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -196,7 +196,8 @@ static void pci_del_option_rom(PCIDevice *pdev);
 static uint16_t pci_default_sub_vendor_id = PCI_SUBVENDOR_ID_REDHAT_QUMRANET;
 static uint16_t pci_default_sub_device_id = PCI_SUBDEVICE_ID_QEMU;
 
-static QLIST_HEAD(, PCIHostState) pci_host_bridges;
+static QTAILQ_HEAD(, PCIHostState) pci_host_bridges =
+    QTAILQ_HEAD_INITIALIZER(pci_host_bridges);
 
 int pci_bar(PCIDevice *d, int reg)
 {
@@ -330,7 +331,7 @@ static void pci_host_bus_register(DeviceState *host)
 {
     PCIHostState *host_bridge = PCI_HOST_BRIDGE(host);
 
-    QLIST_INSERT_HEAD(&pci_host_bridges, host_bridge, next);
+    QTAILQ_INSERT_TAIL(&pci_host_bridges, host_bridge, next);
 }
 
 PCIBus *pci_device_root_bus(const PCIDevice *d)
@@ -1798,7 +1799,7 @@ PciInfoList *qmp_query_pci(Error **errp)
     PciInfoList *info, *head = NULL, *cur_item = NULL;
     PCIHostState *host_bridge;
 
-    QLIST_FOREACH(host_bridge, &pci_host_bridges, next) {
+    QTAILQ_FOREACH(host_bridge, &pci_host_bridges, next) {
         info = g_malloc0(sizeof(*info));
         info->value = qmp_query_pci_bus(host_bridge->bus,
                                         pci_bus_num(host_bridge->bus));
@@ -2493,7 +2494,7 @@ int pci_qdev_find_device(const char *id, PCIDevice **pdev)
     PCIHostState *host_bridge;
     int rc = -ENODEV;
 
-    QLIST_FOREACH(host_bridge, &pci_host_bridges, next) {
+    QTAILQ_FOREACH(host_bridge, &pci_host_bridges, next) {
         int tmp = pci_qdev_find_recursive(host_bridge->bus, id, pdev);
         if (!tmp) {
             rc = 0;
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 6894f37..7955ef9 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -209,6 +209,7 @@ void pc_memory_init(PCMachineState *pcms,
                     MemoryRegion *rom_memory,
                     MemoryRegion **ram_memory);
 uint64_t pc_pci_hole64_start(void);
+uint64_t pc_pci_mcfg_start(void);
 qemu_irq pc_allocate_cpu_irq(void);
 DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus);
 void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
diff --git a/include/hw/pci-bridge/pci_expander_bridge.h 
b/include/hw/pci-bridge/pci_expander_bridge.h
new file mode 100644
index 0000000..870c4cd
--- /dev/null
+++ b/include/hw/pci-bridge/pci_expander_bridge.h
@@ -0,0 +1,11 @@
+#ifndef HW_PCI_EXPANDER_H
+#define HW_PCI_EXPANDER_H
+
+#define PROP_PXB_PCIE_DEV "pxbdev"
+
+#define PROP_PXB_PCIE_DOMAIN_NR "domain_nr"
+#define PROP_PXB_BUS_NR "bus_nr"
+
+uint64_t pxb_pcie_mcfg_hole(void);
+
+#endif
diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h
index 8f4ddde..432e569 100644
--- a/include/hw/pci-host/q35.h
+++ b/include/hw/pci-host/q35.h
@@ -69,6 +69,7 @@ typedef struct Q35PCIHost {
     /*< public >*/
 
     bool pci_hole64_fix;
+    uint32_t domain_nr;
     MCHPCIState mch;
 } Q35PCIHost;
 
diff --git a/include/hw/pci/pci_host.h b/include/hw/pci/pci_host.h
index ba31595..a5617cf 100644
--- a/include/hw/pci/pci_host.h
+++ b/include/hw/pci/pci_host.h
@@ -47,7 +47,7 @@ struct PCIHostState {
     uint32_t config_reg;
     PCIBus *bus;
 
-    QLIST_ENTRY(PCIHostState) next;
+    QTAILQ_ENTRY(PCIHostState) next;
 };
 
 typedef struct PCIHostBridgeClass {
-- 
2.7.4


Reply via email to