[Qemu-devel] [RFC PATCH 26/30] xen/pt: add fixed-size PCIe Extended Capabilities descriptors

2018-03-12 Thread Alexey Gerasimenko
This adds description structures for all fixed-size PCIe Extended
Capabilities.

For every capability register group, only 2 registers are emulated
currently: Capability ID (16 bit) and Next Capability Offset/Version (16
bit). Both needed to implement selective capability hiding. All other
registers are passed through at the moment (unless they belong to
a "hardwired" capability which is hidden)

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt_config_init.c | 183 
 1 file changed, 183 insertions(+)

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 6e99b9ebd7..42296c08cc 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -1734,6 +1734,37 @@ static XenPTRegInfo xen_pt_ext_cap_emu_reg_vendor[] = {
 };
 
 
+/* Common reg static information table for all passthru-type
+ * PCIe Extended Capabilities. Only Extended Cap ID and
+ * Next pointer are handled (to support capability hiding).
+ */
+static XenPTRegInfo xen_pt_ext_cap_emu_reg_dummy[] = {
+{
+.offset = XEN_PCIE_CAP_ID,
+.size   = 2,
+.init_val   = 0x,
+.ro_mask= 0x,
+.emu_mask   = 0x,
+.init   = xen_pt_ext_cap_capid_reg_init,
+.u.w.read   = xen_pt_word_reg_read,
+.u.w.write  = xen_pt_word_reg_write,
+},
+{
+.offset = XEN_PCIE_CAP_LIST_NEXT,
+.size   = 2,
+.init_val   = 0x,
+.ro_mask= 0x,
+.emu_mask   = 0x,
+.init   = xen_pt_ext_cap_ptr_reg_init,
+.u.w.read   = xen_pt_word_reg_read,
+.u.w.write  = xen_pt_word_reg_write,
+},
+{
+.size = 0,
+},
+};
+
+
 /
  * Capabilities
  */
@@ -2009,6 +2040,158 @@ static const XenPTRegGroupInfo xen_pt_emu_reg_grps[] = {
 .size_init   = xen_pt_ext_cap_vendor_size_init,
 .emu_regs= xen_pt_ext_cap_emu_reg_vendor,
 },
+/* Device Serial Number Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_DSN),
+.grp_type   = XEN_PT_GRP_TYPE_EMU,
+.grp_size   = PCI_EXT_CAP_DSN_SIZEOF,   /*0x0C*/
+.size_init  = xen_pt_reg_grp_size_init,
+.emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
+},
+/* Power Budgeting Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_PWR),
+.grp_type   = XEN_PT_GRP_TYPE_EMU,
+.grp_size   = PCI_EXT_CAP_PWR_SIZEOF,   /*0x10*/
+.size_init  = xen_pt_reg_grp_size_init,
+.emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
+},
+/* Root Complex Internal Link Control Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_RCILC),
+.grp_type   = XEN_PT_GRP_TYPE_EMU,
+.grp_size   = 0x0C,
+.size_init  = xen_pt_reg_grp_size_init,
+.emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
+},
+/* Root Complex Event Collector Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_RCEC),
+.grp_type   = XEN_PT_GRP_TYPE_EMU,
+.grp_size   = 0x08,
+.size_init  = xen_pt_reg_grp_size_init,
+.emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
+},
+/* Root Complex Register Block Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_RCRB),
+.grp_type   = XEN_PT_GRP_TYPE_EMU,
+.grp_size   = 0x14,
+.size_init  = xen_pt_reg_grp_size_init,
+.emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
+},
+/* Configuration Access Correlation Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_CAC),
+.grp_type   = XEN_PT_GRP_TYPE_EMU,
+.grp_size   = 0x08,
+.size_init  = xen_pt_reg_grp_size_init,
+.emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
+},
+/* Alternate Routing ID Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_ARI),
+.grp_type   = XEN_PT_GRP_TYPE_EMU,
+.grp_size   = PCI_EXT_CAP_ARI_SIZEOF,
+.size_init  = xen_pt_reg_grp_size_init,
+.emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
+},
+/* Address Translation Services Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_ATS),
+.grp_type   = XEN_PT_GRP_TYPE_EMU,
+.grp_size   = PCI_EXT_CAP_ATS_SIZEOF,
+.size_init  = xen_pt_reg_grp_size_init,
+.emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
+},
+/* Single Root I/O Virtualization Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_SRIOV),
+.grp_type   = XEN_PT_GRP_TYPE_EMU,
+.grp_size   = PCI_EXT_CAP_SRIOV_SIZEOF,
+.size_init  = xen_pt_reg_grp_size_init,
+.emu_regs   = xen_pt_ext_c

[Qemu-devel] [RFC PATCH 27/30] xen/pt: add AER PCIe Extended Capability descriptor and sizing

2018-03-12 Thread Alexey Gerasimenko
The patch provides Advanced Error Reporting PCIe Extended Capability
description structure and corresponding capability sizing function.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt_config_init.c | 72 +
 1 file changed, 72 insertions(+)

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 42296c08cc..98aae3daca 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -1924,6 +1924,70 @@ static int xen_pt_msix_size_init(XenPCIPassthroughState 
*s,
 return 0;
 }
 
+/* get Advanced Error Reporting Extended Capability register group size */
+#define PCI_ERR_CAP_TLP_PREFIX_LOG  (1U << 11)
+#define PCI_DEVCAP2_END_END_TLP_PREFIX  (1U << 21)
+static int xen_pt_ext_cap_aer_size_init(XenPCIPassthroughState *s,
+const XenPTRegGroupInfo *grp_reg,
+uint32_t base_offset,
+uint32_t *size)
+{
+uint8_t dev_type = get_pcie_device_type(s);
+uint32_t aer_caps = 0;
+uint32_t sz = 0;
+int pcie_cap_pos;
+uint32_t devcaps2;
+int ret = 0;
+
+pcie_cap_pos = xen_host_pci_find_next_cap(>real_device, 0,
+  PCI_CAP_ID_EXP);
+if (!pcie_cap_pos) {
+XEN_PT_ERR(>dev,
+   "Cannot find a required PCI Express Capability\n");
+return -1;
+}
+
+if (get_pcie_capability_version(s) > 1) {
+ret = xen_host_pci_get_long(>real_device,
+pcie_cap_pos + PCI_EXP_DEVCAP2,
+);
+if (ret) {
+XEN_PT_ERR(>dev, "Error while reading Device "
+   "Capabilities 2 Register \n");
+return -1;
+}
+}
+
+if (devcaps2 & PCI_DEVCAP2_END_END_TLP_PREFIX) {
+ret = xen_host_pci_get_long(>real_device,
+base_offset + PCI_ERR_CAP,
+_caps);
+if (ret) {
+XEN_PT_ERR(>dev,
+   "Error while reading AER Extended Capability\n");
+return -1;
+}
+
+if (aer_caps & PCI_ERR_CAP_TLP_PREFIX_LOG) {
+sz = 0x48;
+}
+}
+
+if (!sz) {
+if (dev_type == PCI_EXP_TYPE_ROOT_PORT ||
+dev_type == PCI_EXP_TYPE_RC_EC) {
+sz = 0x38;
+} else {
+sz = 0x2C;
+}
+}
+
+*size = sz;
+
+log_pcie_extended_cap(s, "AER", base_offset, *size);
+return ret;
+}
+
 
 static const XenPTRegGroupInfo xen_pt_emu_reg_grps[] = {
 /* Header Type0 reg group */
@@ -2192,6 +2256,14 @@ static const XenPTRegGroupInfo xen_pt_emu_reg_grps[] = {
 .size_init  = xen_pt_reg_grp_size_init,
 .emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
 },
+/* Advanced Error Reporting Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_ERR),
+.grp_type   = XEN_PT_GRP_TYPE_EMU,
+.grp_size   = 0xFF,
+.size_init  = xen_pt_ext_cap_aer_size_init,
+.emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
+},
 {
 .grp_size = 0,
 },
-- 
2.11.0




[Qemu-devel] [RFC PATCH 23/30] xen/pt: handle PCIe Extended Capabilities Next register

2018-03-12 Thread Alexey Gerasimenko
The patch adds new xen_pt_ext_cap_ptr_reg_init function which is used
to initialize the emulated next pcie extended capability pointer.

Primary mission of this function is to have a method to selectively hide
some extended capabilities from the capability linked list, skipping them
by altering the Next capability pointer value.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt_config_init.c | 73 +++--
 1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 9c041fa288..0ce2a033f9 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -23,11 +23,14 @@
 
 #define XEN_PT_INVALID_REG  0x  /* invalid register value 
*/
 
-/* prototype */
+/* prototypes */
 
 static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg,
uint32_t real_offset, uint32_t *data);
-
+static int xen_pt_ext_cap_ptr_reg_init(XenPCIPassthroughState *s,
+   XenPTRegInfo *reg,
+   uint32_t real_offset,
+   uint32_t *data);
 
 /* helper */
 
@@ -1932,6 +1935,72 @@ out:
 return 0;
 }
 
+#define PCIE_EXT_CAP_NEXT_SHIFT 4
+#define PCIE_EXT_CAP_VER_MASK   0xF
+
+static int xen_pt_ext_cap_ptr_reg_init(XenPCIPassthroughState *s,
+   XenPTRegInfo *reg,
+   uint32_t real_offset,
+   uint32_t *data)
+{
+int i, rc;
+XenHostPCIDevice *d = >real_device;
+uint16_t reg_field;
+uint16_t cur_offset, version, cap_id;
+uint32_t header;
+
+if (real_offset < PCI_CONFIG_SPACE_SIZE) {
+XEN_PT_ERR(>dev, "Incorrect PCIe extended capability offset"
+   "encountered: 0x%04x\n", real_offset);
+return -EINVAL;
+}
+
+rc = xen_host_pci_get_word(d, real_offset, _field);
+if (rc)
+return rc;
+
+/* preserve version field */
+version= reg_field & PCIE_EXT_CAP_VER_MASK;
+cur_offset = reg_field >> PCIE_EXT_CAP_NEXT_SHIFT;
+
+while (cur_offset && cur_offset != 0xFFF) {
+rc = xen_host_pci_get_long(d, cur_offset, );
+if (rc) {
+XEN_PT_ERR(>dev, "Failed to read PCIe extended capability "
+   "@0x%x (rc:%d)\n", cur_offset, rc);
+return rc;
+}
+
+cap_id = PCI_EXT_CAP_ID(header);
+
+for (i = 0; xen_pt_emu_reg_grps[i].grp_size != 0; i++) {
+uint32_t cur_grp_id = xen_pt_emu_reg_grps[i].grp_id;
+
+if (!IS_PCIE_EXT_CAP_ID(cur_grp_id))
+continue;
+
+if (xen_pt_hide_dev_cap(d, cur_grp_id))
+continue;
+
+if (GET_PCIE_EXT_CAP_ID(cur_grp_id) == cap_id) {
+if (xen_pt_emu_reg_grps[i].grp_type == XEN_PT_GRP_TYPE_EMU)
+goto out;
+
+/* skip TYPE_HARDWIRED capability, move the ptr to next one */
+break;
+}
+}
+
+/* next capability */
+cur_offset = PCI_EXT_CAP_NEXT(header);
+}
+
+out:
+*data = (cur_offset << PCIE_EXT_CAP_NEXT_SHIFT) | version;
+return 0;
+}
+
+
 
 /*
  * Main
-- 
2.11.0




[Qemu-devel] [RFC PATCH 30/30] xen/pt: add VC/VC9/MFVC PCIe Extended Capabilities descriptors and sizing

2018-03-12 Thread Alexey Gerasimenko
Virtual Channel/MFVC capabilities are relatively useless for emulation
(passing through accesses to them should be enough in most cases) yet they
have hardest format of all PCIe Extended Capabilities, mostly because
VC capability format allows the sparse config space layout with gaps
between the parts which make up the VC capability.

We have the main capability body followed by variable number of entries
where each entry may additionally reference the arbitration table outside
main capability body. There are no constrains on these arbitration table
offsets -- in theory, they may reside outside the VC capability range
anywhere in PCIe extended config space. Also, every arbitration table size
is not fixed - it depends on current VC/Port Arbitration Select field
value.

To simplify things, this patch assume that changing VC/Port Arbitration
Select value (i.e. resizing arbitration tables) do not cause arbitration
table offsets to change. Normally the device must place arbitration tables
considering their maximum size, not current one. Maximum arbitration table
size depends on VC/Port Arbitration Capability bitmask -- this is what
actually used to calculate the arbitration table size.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt_config_init.c | 192 
 1 file changed, 192 insertions(+)

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index b03b071b22..ab9c233d84 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -2177,6 +2177,174 @@ static int 
xen_pt_ext_cap_rebar_size_init(XenPCIPassthroughState *s,
 return ret;
 }
 
+/* get VC/VC9/MFVC Extended Capability register group size */
+static uint32_t get_arb_table_len_max(XenPCIPassthroughState *s,
+  uint32_t max_bit_supported,
+  uint32_t arb_cap)
+{
+int n_bit;
+uint32_t table_max_size = 0;
+
+if (!arb_cap) {
+return 0;
+}
+
+for (n_bit = 7; n_bit >= 0 && !(arb_cap & (1 << n_bit)); n_bit--);
+
+if (n_bit > max_bit_supported) {
+XEN_PT_ERR(>dev, "Warning: encountered unknown VC arbitration "
+   "capability supported: 0x%02x\n", (uint8_t) arb_cap);
+}
+
+switch (n_bit) {
+case 0: break;
+case 1: return 32;
+case 2: return 64;
+case 3: /*128 too*/
+case 4: return 128;
+default:
+table_max_size = 8 << n_bit;
+}
+
+return table_max_size;
+}
+
+#define GET_ARB_TABLE_OFFSET(x)   (((x) >> 24) * 0x10)
+#define GET_VC_ARB_CAPABILITY(x)  ((x) & 0xFF)
+#define ARB_TABLE_ENTRY_SIZE_BITS(x)  (1 << (((x) & PCI_VC_CAP1_ARB_SIZE)\
+  >> 10))
+static int xen_pt_ext_cap_vchan_size_init(XenPCIPassthroughState *s,
+  const XenPTRegGroupInfo *grp_reg,
+  uint32_t base_offset,
+  uint32_t *size)
+{
+uint32_t header;
+uint32_t vc_cap_max_size = PCIE_CONFIG_SPACE_SIZE - base_offset;
+uint32_t next_ptr;
+uint32_t arb_table_start_max = 0, arb_table_end_max = 0;
+uint32_t port_vc_cap1, port_vc_cap2, vc_rsrc_cap;
+uint32_t ext_vc_count = 0;
+uint32_t arb_table_entry_size;  /* in bits */
+const char *cap_name;
+int ret;
+int i;
+
+ret = xen_host_pci_get_long(>real_device, base_offset, );
+if (ret) {
+goto err_read;
+}
+
+next_ptr = PCI_EXT_CAP_NEXT(header);
+
+switch (PCI_EXT_CAP_ID(header)) {
+case PCI_EXT_CAP_ID_VC:
+case PCI_EXT_CAP_ID_VC9:
+cap_name = "Virtual Channel";
+break;
+case PCI_EXT_CAP_ID_MFVC:
+cap_name = "Multi-Function VC";
+break;
+default:
+XEN_PT_ERR(>dev, "Unknown VC Extended Capability ID "
+   "encountered: 0x%04x\n", PCI_EXT_CAP_ID(header));
+return -1;
+}
+
+if (next_ptr && next_ptr > base_offset) {
+vc_cap_max_size = next_ptr - base_offset;
+}
+
+ret = xen_host_pci_get_long(>real_device,
+base_offset + PCI_VC_PORT_CAP1,
+_vc_cap1);
+if (ret) {
+goto err_read;
+}
+
+ret = xen_host_pci_get_long(>real_device,
+base_offset + PCI_VC_PORT_CAP2,
+_vc_cap2);
+if (ret) {
+goto err_read;
+}
+
+ext_vc_count = port_vc_cap1 & PCI_VC_CAP1_EVCC;
+
+arb_table_start_max = GET_ARB_TABLE_OFFSET(port_vc_cap2);
+
+/* check arbitration table offset for validity */
+if (arb_table_start_max >= vc_cap_max_size) {
+XEN_PT_ERR(>dev, "Warning: VC arbitration table offset points "
+  

[Qemu-devel] [RFC PATCH 28/30] xen/pt: add descriptors and size calculation for RCLD/ACS/PMUX/DPA/MCAST/TPH/DPC PCIe Extended Capabilities

2018-03-12 Thread Alexey Gerasimenko
Add few more PCIe Extended Capabilities entries to the
xen_pt_emu_reg_grps[] array along with their corresponding *_size_init()
functions.

All these capabilities have non-fixed size but their size calculation
is very simple, hence adding them in a single batch.

For every capability register group, only 2 registers are emulated
currently: Capability ID (16 bit) and Next Capability Offset/Version (16
bit). Both needed to implement the selective capability hiding. All other
registers are passed through at the moment (unless they belong to
a capability marked as "hardwired" which is hidden)

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt_config_init.c | 224 
 1 file changed, 224 insertions(+)

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 98aae3daca..326f5671ff 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -1988,6 +1988,174 @@ static int 
xen_pt_ext_cap_aer_size_init(XenPCIPassthroughState *s,
 return ret;
 }
 
+/* get Root Complex Link Declaration Extended Capability register group size */
+#define RCLD_GET_NUM_ENTRIES(x) (((x) >> 8) & 0xFF)
+static int xen_pt_ext_cap_rcld_size_init(XenPCIPassthroughState *s,
+ const XenPTRegGroupInfo *grp_reg,
+ uint32_t base_offset,
+ uint32_t *size)
+{
+uint32_t elem_self_descr = 0;
+
+int ret = xen_host_pci_get_long(>real_device,
+base_offset + 4,
+_self_descr);
+
+*size = 0x10 + RCLD_GET_NUM_ENTRIES(elem_self_descr) * 0x10;
+
+log_pcie_extended_cap(s, "Root Complex Link Declaration",
+  base_offset, *size);
+return ret;
+}
+
+/* get Access Control Services Extended Capability register group size */
+#define ACS_VECTOR_SIZE_BITS(x)x) >> 8) & 0xFF) ?: 256)
+static int xen_pt_ext_cap_acs_size_init(XenPCIPassthroughState *s,
+const XenPTRegGroupInfo *grp_reg,
+uint32_t base_offset,
+uint32_t *size)
+{
+uint16_t acs_caps = 0;
+
+int ret = xen_host_pci_get_word(>real_device,
+base_offset + PCI_ACS_CAP,
+_caps);
+
+if (acs_caps & PCI_ACS_EC) {
+uint32_t vector_sz = ACS_VECTOR_SIZE_BITS(acs_caps);
+
+*size = PCI_ACS_EGRESS_CTL_V + ((vector_sz + 7) & ~7) / 8;
+} else {
+*size = PCI_ACS_EGRESS_CTL_V;
+}
+
+log_pcie_extended_cap(s, "ACS", base_offset, *size);
+return ret;
+}
+
+/* get Multicast Extended Capability register group size */
+static int xen_pt_ext_cap_multicast_size_init(XenPCIPassthroughState *s,
+  const XenPTRegGroupInfo *grp_reg,
+  uint32_t base_offset,
+  uint32_t *size)
+{
+uint8_t dev_type = get_pcie_device_type(s);
+
+switch (dev_type) {
+case PCI_EXP_TYPE_ENDPOINT:
+case PCI_EXP_TYPE_LEG_END:
+case PCI_EXP_TYPE_RC_END:
+case PCI_EXP_TYPE_RC_EC:
+default:
+*size = PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF;
+break;
+
+case PCI_EXP_TYPE_ROOT_PORT:
+case PCI_EXP_TYPE_UPSTREAM:
+case PCI_EXP_TYPE_DOWNSTREAM:
+*size = 0x30;
+break;
+}
+
+log_pcie_extended_cap(s, "Multicast", base_offset, *size);
+return 0;
+}
+
+/* get Dynamic Power Allocation Extended Capability register group size */
+static int xen_pt_ext_cap_dpa_size_init(XenPCIPassthroughState *s,
+const XenPTRegGroupInfo *grp_reg,
+uint32_t base_offset,
+uint32_t *size)
+{
+uint32_t dpa_caps = 0;
+uint32_t num_entries;
+
+int ret = xen_host_pci_get_long(>real_device,
+base_offset + PCI_DPA_CAP,
+_caps);
+
+num_entries = (dpa_caps & PCI_DPA_CAP_SUBSTATE_MASK) + 1;
+
+*size = PCI_DPA_BASE_SIZEOF + num_entries /*byte-size registers*/;
+
+log_pcie_extended_cap(s, "Dynamic Power Allocation", base_offset, *size);
+return ret;
+}
+
+/* get TPH Requester Extended Capability register group size */
+static int xen_pt_ext_cap_tph_size_init(XenPCIPassthroughState *s,
+const XenPTRegGroupInfo *grp_reg,
+uint32_t base_offset,
+uint32_t *size)
+{
+uint32_t tph_caps = 0;
+uint32_t num_entries;
+
+int ret = xen_host_pci_get_long(>real_device,
+ 

[Qemu-devel] [RFC PATCH 21/30] xen/pt: Xen PCIe passthrough support for Q35: bypass PCIe topology check

2018-03-12 Thread Alexey Gerasimenko
responding physical upstream PCIe
  Switch/RootPort. This will require some interaction with Dom0, hopefully
  extending xen-pciback will be enough.

3) The concept of I/O and MMIO ranges nesting, for tasks like sizing MMIO
  hole or PCI BAR allocation. This one should be pretty simple.

The actual implementation still is a matter to discuss of course.

In the meantime there can be used a very simple workaround which allows
to bypass pci.sys limitation for PCIe topology check - there exist one
good exception to "must have upstream PCIe parent" rule of pci.sys. It's
chipset-integrated devices. How pci.sys can tell if it deals with
a chipset built-in device? It checks one of PCI Express Capability fields
in the device PCI conf space. For chipset built-in devices this field will
state "root complex integrated device" while in our  case for a normal
passed thru PCIe device there will be a "PCIe endpoint" type. So that's
what the workaround does - it intercepts reading of this particular field
for passed through devices and returns the "root complex integrated
device" value for PCIe endpoints. This makes pci.sys happy and allows
Windows 7 and above to use PT device on PCIe-capable system normally.
So far no negative side effects were encountered while using this
approach, so it's a good temporary solution until multiple PCI bus support
will be added to Xen.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt_config_init.c | 60 +
 1 file changed, 60 insertions(+)

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 02e8c97f3c..91de215407 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -902,6 +902,55 @@ static int 
xen_pt_linkctrl2_reg_init(XenPCIPassthroughState *s,
 *data = reg_field;
 return 0;
 }
+/* initialize PCI Express Capabilities register */
+static int xen_pt_pcie_capabilities_reg_init(XenPCIPassthroughState *s,
+ XenPTRegInfo *reg,
+ uint32_t real_offset,
+ uint32_t *data)
+{
+uint8_t dev_type = get_pcie_device_type(s);
+uint16_t reg_field;
+
+if (xen_host_pci_get_word(>real_device,
+ real_offset - reg->offset + PCI_EXP_FLAGS,
+ _field)) {
+XEN_PT_ERR(>dev, "Error reading PCIe Capabilities reg\n");
+*data = 0;
+return 0;
+}
+
+/*
+ * Q35 workaround for Win7+ pci.sys PCIe topology check.
+ * As our PT device currently located on a bus 0, fake the
+ * device/port type field to the "Root Complex integrated device"
+ * value to bypass the check
+ */
+switch (dev_type) {
+case PCI_EXP_TYPE_ENDPOINT:
+case PCI_EXP_TYPE_LEG_END:
+XEN_PT_LOG(>dev, "Original PCIe Capabilities reg is 0x%04X\n",
+reg_field);
+reg_field &= ~PCI_EXP_FLAGS_TYPE;
+reg_field |= ((PCI_EXP_TYPE_RC_END /*9*/ << 4) & PCI_EXP_FLAGS_TYPE);
+XEN_PT_LOG(>dev, "Q35 PCIe topology check workaround: "
+   "faking Capabilities reg to 0x%04X\n", reg_field);
+break;
+
+case PCI_EXP_TYPE_ROOT_PORT:
+case PCI_EXP_TYPE_UPSTREAM:
+case PCI_EXP_TYPE_DOWNSTREAM:
+case PCI_EXP_TYPE_PCI_BRIDGE:
+case PCI_EXP_TYPE_PCIE_BRIDGE:
+case PCI_EXP_TYPE_RC_END:
+case PCI_EXP_TYPE_RC_EC:
+default:
+/* do nothing, return as is */
+break;
+}
+
+*data = reg_field;
+return 0;
+}
 
 /* PCI Express Capability Structure reg static information table */
 static XenPTRegInfo xen_pt_emu_reg_pcie[] = {
@@ -916,6 +965,17 @@ static XenPTRegInfo xen_pt_emu_reg_pcie[] = {
 .u.b.read   = xen_pt_byte_reg_read,
 .u.b.write  = xen_pt_byte_reg_write,
 },
+/* PCI Express Capabilities Register */
+{
+.offset = PCI_EXP_FLAGS,
+.size   = 2,
+.init_val   = 0x,
+.ro_mask= 0x,
+.emu_mask   = 0x,
+.init   = xen_pt_pcie_capabilities_reg_init,
+.u.w.read   = xen_pt_word_reg_read,
+.u.w.write  = xen_pt_word_reg_write,
+},
 /* Device Capabilities reg */
 {
 .offset = PCI_EXP_DEVCAP,
-- 
2.11.0




[Qemu-devel] [RFC PATCH 24/30] xen/pt: allow to hide PCIe Extended Capabilities

2018-03-12 Thread Alexey Gerasimenko
We need to hide some unwanted PCI/PCIe capabilities for passed through
devices.
Normally we do this by marking the capability register group
as XEN_PT_GRP_TYPE_HARDWIRED which exclude this capability from the
capability list and returns zeroes on attempts to read capability body.
Skipping the capability in the linked list of capabilities can be done
by changing Next Capability register to skip one or many unwanted
capabilities.

One difference between PCI and PCIe Extended capabilities is that we don't
have the list head field anymore. PCIe Extended capabilities always start
at offset 0x100 if they're present. Unfortunately, there are typically
only few PCIe extended capabilities present which means there is a chance
that some capability we want to hide will reside at offset 0x100 in PCIe
config space.

The simplest way to hide such capabilities from guest OS or drivers
is faking their capability ID value.

This patch adds the Capability ID register handler which checks
- if the capability to which this register belong starts at offset 0x100
  in PCIe config space
- if this capability is marked as XEN_PT_GRP_TYPE_HARDWIRED

If it is the case, then a fake Capability ID value is returned.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt.c | 11 +++-
 hw/xen/xen_pt.h |  5 
 hw/xen/xen_pt_config_init.c | 62 -
 3 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index bf098c26b3..e6a18afa83 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -154,7 +154,16 @@ static uint32_t xen_pt_pci_read_config(PCIDevice *d, 
uint32_t addr, int len)
 reg_grp_entry = xen_pt_find_reg_grp(s, addr);
 if (reg_grp_entry) {
 /* check 0-Hardwired register group */
-if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) {
+if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED &&
+/*
+ * For PCIe Extended Capabilities we need to emulate
+ * CapabilityID and NextCapability/Version registers for a
+ * hardwired reg group located at the offset 0x100 in PCIe
+ * config space. This allows us to hide the first extended
+ * capability as well.
+ */
+!(reg_grp_entry->base_offset == PCI_CONFIG_SPACE_SIZE &&
+ranges_overlap(addr, len, 0x100, 4))) {
 /* no need to emulate, just return 0 */
 val = 0;
 goto exit;
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index 5531347ab2..ac45261679 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -78,6 +78,11 @@ typedef int (*xen_pt_conf_byte_read)
 
 #define XEN_PCI_INTEL_OPREGION 0xfc
 
+#define XEN_PCIE_CAP_ID 0
+#define XEN_PCIE_CAP_LIST_NEXT  2
+
+#define XEN_PCIE_FAKE_CAP_ID_BASE   0xFE00
+
 typedef enum {
 XEN_PT_GRP_TYPE_HARDWIRED = 0,  /* 0 Hardwired reg group */
 XEN_PT_GRP_TYPE_EMU,/* emul reg group */
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 0ce2a033f9..10f3b67d35 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -31,6 +31,10 @@ static int 
xen_pt_ext_cap_ptr_reg_init(XenPCIPassthroughState *s,
XenPTRegInfo *reg,
uint32_t real_offset,
uint32_t *data);
+static int xen_pt_ext_cap_capid_reg_init(XenPCIPassthroughState *s,
+ XenPTRegInfo *reg,
+ uint32_t real_offset,
+ uint32_t *data);
 
 /* helper */
 
@@ -1630,6 +1634,56 @@ static XenPTRegInfo xen_pt_emu_reg_igd_opregion[] = {
 },
 };
 
+
+/
+ * Emulated registers for
+ * PCIe Extended Capabilities
+ */
+
+static uint16_t fake_cap_id = XEN_PCIE_FAKE_CAP_ID_BASE;
+
+/* PCIe Extended Capability ID reg */
+static int xen_pt_ext_cap_capid_reg_init(XenPCIPassthroughState *s,
+ XenPTRegInfo *reg,
+ uint32_t real_offset,
+ uint32_t *data)
+{
+uint16_t reg_field;
+int rc;
+XenPTRegGroup *reg_grp_entry = NULL;
+
+/* use real device register's value as initial value */
+rc = xen_host_pci_get_word(>real_device, real_offset, _field);
+if (rc) {
+return rc;
+}
+
+reg_grp_entry = xen_pt_find_reg_grp(s, real_offset);
+
+if (reg_grp_entry) {
+if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED &&
+reg_grp_entry->base_offset == PCI_CONFIG_SPACE_SIZE) {
+/*
+ * This is the situation when we were asked to hide (aka
+ * "hardwire to 0") some PCIe ext capability

[Qemu-devel] [RFC PATCH 29/30] xen/pt: add Resizable BAR PCIe Extended Capability descriptor and sizing

2018-03-12 Thread Alexey Gerasimenko
Unlike other PCIe Extended Capabilities, we currently cannot allow attempts
to use Resizable BAR Capability. Without specifically handling BAR resizing
we're likely end up with corrupted MMIO hole layout if guest OS will
attempt to use this feature. Actually, recent Windows versions started
to understand and use the Resizable BAR Capability (see [1]).

For now, we need to hide the Resizable BAR Capability from guest OS until
BAR resizing emulation support will be implemented in Xen. This support
is a pretty much mandatory todo-feature as the effect of writing
to Resizable BAR control registers can be considered similar
to reprogramming normal BAR registers -- i.e. this needs to be handled
explicitly, resulting in corresponding MMIO BAR range(s) remapping.
Until then, we mark the Resizable BAR Capability as
XEN_PT_GRP_TYPE_HARDWIRED.

[1]: 
https://docs.microsoft.com/en-us/windows-hardware/drivers/display/resizable-bar-support

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt_config_init.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 326f5671ff..b03b071b22 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -2156,6 +2156,26 @@ static int 
xen_pt_ext_cap_pmux_size_init(XenPCIPassthroughState *s,
 return ret;
 }
 
+/* get Resizable BAR Extended Capability register group size */
+static int xen_pt_ext_cap_rebar_size_init(XenPCIPassthroughState *s,
+  const XenPTRegGroupInfo *grp_reg,
+  uint32_t base_offset,
+  uint32_t *size)
+{
+uint32_t rebar_ctl = 0;
+uint32_t num_entries;
+
+int ret = xen_host_pci_get_long(>real_device,
+base_offset + PCI_REBAR_CTRL,
+_ctl);
+num_entries =
+(rebar_ctl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT;
+
+*size = num_entries*8 + 4;
+
+log_pcie_extended_cap(s, "Resizable BAR", base_offset, *size);
+return ret;
+}
 
 static const XenPTRegGroupInfo xen_pt_emu_reg_grps[] = {
 /* Header Type0 reg group */
@@ -2488,6 +2508,13 @@ static const XenPTRegGroupInfo xen_pt_emu_reg_grps[] = {
 .size_init  = xen_pt_ext_cap_dpc_size_init,
 .emu_regs   = xen_pt_ext_cap_emu_reg_dummy,
 },
+/* Resizable BAR Extended Capability reg group */
+{
+.grp_id = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_REBAR),
+.grp_type   = XEN_PT_GRP_TYPE_HARDWIRED,
+.grp_size   = 0xFF,
+.size_init  = xen_pt_ext_cap_rebar_size_init,
+},
 {
 .grp_size = 0,
 },
-- 
2.11.0




[Qemu-devel] [RFC PATCH 25/30] xen/pt: add Vendor-specific PCIe Extended Capability descriptor and sizing

2018-03-12 Thread Alexey Gerasimenko
The patch provides Vendor-specific PCIe Extended Capability description
structure and corresponding sizing function. In this particular case the
size of the Vendor capability is available in the VSEC Length field.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt_config_init.c | 77 +++--
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 10f3b67d35..6e99b9ebd7 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -129,6 +129,18 @@ static uint32_t get_throughable_mask(const 
XenPCIPassthroughState *s,
 return throughable_mask & valid_mask;
 }
 
+static void log_pcie_extended_cap(XenPCIPassthroughState *s,
+  const char *cap_name,
+  uint32_t base_offset, uint32_t size)
+{
+if (size) {
+XEN_PT_LOG(>dev, "Found PCIe Extended Capability: %s at 0x%04x, "
+"size 0x%x bytes\n", cap_name,
+(uint16_t) base_offset, size);
+}
+}
+
+
 /
  * general register functions
  */
@@ -1684,6 +1696,44 @@ static int 
xen_pt_ext_cap_capid_reg_init(XenPCIPassthroughState *s,
 }
 
 
+/* Vendor-specific Ext Capability Structure reg static information table */
+static XenPTRegInfo xen_pt_ext_cap_emu_reg_vendor[] = {
+{
+.offset = XEN_PCIE_CAP_ID,
+.size   = 2,
+.init_val   = 0x,
+.ro_mask= 0x,
+.emu_mask   = 0x,
+.init   = xen_pt_ext_cap_capid_reg_init,
+.u.w.read   = xen_pt_word_reg_read,
+.u.w.write  = xen_pt_word_reg_write,
+},
+{
+.offset = XEN_PCIE_CAP_LIST_NEXT,
+.size   = 2,
+.init_val   = 0x,
+.ro_mask= 0x,
+.emu_mask   = 0x,
+.init   = xen_pt_ext_cap_ptr_reg_init,
+.u.w.read   = xen_pt_word_reg_read,
+.u.w.write  = xen_pt_word_reg_write,
+},
+{
+.offset = PCI_VNDR_HEADER,
+.size   = 4,
+.init_val   = 0x,
+.ro_mask= 0x,
+.emu_mask   = 0x,
+.init   = xen_pt_common_reg_init,
+.u.dw.read  = xen_pt_long_reg_read,
+.u.dw.write = xen_pt_long_reg_write,
+},
+{
+.size = 0,
+},
+};
+
+
 /
  * Capabilities
  */
@@ -1708,6 +1758,23 @@ static int 
xen_pt_vendor_size_init(XenPCIPassthroughState *s,
 *size = sz;
 return ret;
 }
+
+static int xen_pt_ext_cap_vendor_size_init(XenPCIPassthroughState *s,
+   const XenPTRegGroupInfo *grp_reg,
+   uint32_t base_offset,
+   uint32_t *size)
+{
+uint32_t vsec_hdr = 0;
+int ret = xen_host_pci_get_long(>real_device,
+base_offset + PCI_VNDR_HEADER,
+_hdr);
+
+*size = PCI_VNDR_HEADER_LEN(vsec_hdr);
+
+log_pcie_extended_cap(s, "Vendor-specific", base_offset, *size);
+
+return ret;
+}
 /* get PCI Express Capability Structure register group size */
 static int xen_pt_pcie_size_init(XenPCIPassthroughState *s,
  const XenPTRegGroupInfo *grp_reg,
@@ -1934,6 +2001,14 @@ static const XenPTRegGroupInfo xen_pt_emu_reg_grps[] = {
 .size_init   = xen_pt_reg_grp_size_init,
 .emu_regs= xen_pt_emu_reg_igd_opregion,
 },
+/* Vendor-specific Extended Capability reg group */
+{
+.grp_id  = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_VNDR),
+.grp_type= XEN_PT_GRP_TYPE_EMU,
+.grp_size= 0xFF,
+.size_init   = xen_pt_ext_cap_vendor_size_init,
+.emu_regs= xen_pt_ext_cap_emu_reg_vendor,
+},
 {
 .grp_size = 0,
 },
@@ -2054,8 +2129,6 @@ out:
 return 0;
 }
 
-
-
 /*
  * Main
  */
-- 
2.11.0




[Qemu-devel] [RFC PATCH 22/30] xen/pt: add support for PCIe Extended Capabilities and larger config space

2018-03-12 Thread Alexey Gerasimenko
This patch provides basic facilities for PCIe Extended Capabilities and
support for controlled (via s->pcie_enabled_dev flag) access to PCIe
config space (>256).

PCIe Extended Capabilities make use of 16-bit capability ID. Also,
a capability size might exceed 8-bit width. So as the very first step
we need to increase type size for grp_id, grp_size, etc -- they were
limited to 8-bit.

The only troublesome issue with PCIe Extended Capability IDs is that their
value range is actually same as for basic PCI capabilities.
Eg. capability ID 3 means VPD Capability for PCI and at the same time
Device Serial Number Capability for PCIe Extended caps. This adds a bit of
inconvenience.

In order to distinguish between two sets of same capability IDs, the patch
introduces a set of macros to mark a capability ID as PCIe Extended one
(or check if it is basic/extended + get a raw ID value):
- PCIE_EXT_CAP_ID(cap_id)
- IS_PCIE_EXT_CAP_ID(grp_id)
- GET_PCIE_EXT_CAP_ID(grp_id)

Here is how it's used:
/* Intel IGD Opregion group */
{
.grp_id  = XEN_PCI_INTEL_OPREGION,  /* no change */
.grp_type= XEN_PT_GRP_TYPE_EMU,
.grp_size= 0x4,
.size_init   = xen_pt_reg_grp_size_init,
.emu_regs= xen_pt_emu_reg_igd_opregion,
},
/* Vendor-specific Extended Capability reg group */
{
.grp_id  = PCIE_EXT_CAP_ID(PCI_EXT_CAP_ID_VNDR),
.grp_type= XEN_PT_GRP_TYPE_EMU,
.grp_size= 0xFF,
.size_init   = xen_pt_ext_cap_vendor_size_init,
.emu_regs= xen_pt_ext_cap_emu_reg_vendor,
},
By using the PCIE_EXT_CAP_ID() macro it is possible to reuse existing
header files with already defined PCIe Extended Capability ID values.

find_cap_offset() receive capabily ID and checks if it's an Extended one
by using IS_PCIE_EXT_CAP_ID(cap) macro, passing the real capabiliy
ID value to either xen_host_pci_find_next_ext_cap
or xen_host_pci_find_next_cap.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt.c |  14 +-
 hw/xen/xen_pt.h |  13 +++--
 hw/xen/xen_pt_config_init.c | 113 +---
 3 files changed, 74 insertions(+), 66 deletions(-)

diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index a902a9b685..bf098c26b3 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -82,10 +82,20 @@ void xen_pt_log(const PCIDevice *d, const char *f, ...)
 
 /* Config Space */
 
-static int xen_pt_pci_config_access_check(PCIDevice *d, uint32_t addr, int len)
+static int xen_pt_pci_config_access_check(PCIDevice *d,
+  uint32_t addr, int len)
 {
+XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
+
 /* check offset range */
-if (addr > 0xFF) {
+if (s->pcie_enabled_dev) {
+if (addr >= PCIE_CONFIG_SPACE_SIZE) {
+XEN_PT_ERR(d, "Failed to access register with offset "
+  "exceeding 0xFFF. (addr: 0x%02x, len: %d)\n",
+  addr, len);
+return -1;
+}
+} else if (addr >= PCI_CONFIG_SPACE_SIZE) {
 XEN_PT_ERR(d, "Failed to access register with offset exceeding 0xFF. "
"(addr: 0x%02x, len: %d)\n", addr, len);
 return -1;
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index 1204acbdce..5531347ab2 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -31,6 +31,11 @@ void xen_pt_log(const PCIDevice *d, const char *f, ...) 
GCC_FMT_ATTR(2, 3);
 /* Helper */
 #define XEN_PFN(x) ((x) >> XC_PAGE_SHIFT)
 
+/* Macro's for PCIe Extended Capabilities */
+#define PCIE_EXT_CAP_ID(cap_id) ((cap_id) | (1U << 16))
+#define IS_PCIE_EXT_CAP_ID(grp_id)  ((grp_id) & (1U << 16))
+#define GET_PCIE_EXT_CAP_ID(grp_id) ((grp_id) & 0x)
+
 typedef const struct XenPTRegInfo XenPTRegInfo;
 typedef struct XenPTReg XenPTReg;
 
@@ -152,13 +157,13 @@ typedef const struct XenPTRegGroupInfo XenPTRegGroupInfo;
 /* emul reg group size initialize method */
 typedef int (*xen_pt_reg_size_init_fn)
 (XenPCIPassthroughState *, XenPTRegGroupInfo *,
- uint32_t base_offset, uint8_t *size);
+ uint32_t base_offset, uint32_t *size);
 
 /* emulated register group information */
 struct XenPTRegGroupInfo {
-uint8_t grp_id;
+uint32_t grp_id;
 XenPTRegisterGroupType grp_type;
-uint8_t grp_size;
+uint32_t grp_size;
 xen_pt_reg_size_init_fn size_init;
 XenPTRegInfo *emu_regs;
 };
@@ -168,7 +173,7 @@ typedef struct XenPTRegGroup {
 QLIST_ENTRY(XenPTRegGroup) entries;
 XenPTRegGroupInfo *reg_grp;
 uint32_t base_offset;
-uint8_t size;
+uint32_t size;
 QLIST_HEAD(, XenPTReg) reg_tbl_list;
 } XenPTRegGroup;
 
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index 91de215407..9c041fa288 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -32,29 +32,42 @

[Qemu-devel] [RFC PATCH 18/30] xen/pt: XenHostPCIDevice: provide functions for PCI Capabilities and PCIe Extended Capabilities enumeration

2018-03-12 Thread Alexey Gerasimenko
This patch introduces 2 new functions,
- xen_host_pci_find_next_ext_cap (actually a reworked
  xen_host_pci_find_ext_cap_offset function which is unused)
- xen_host_pci_find_next_cap

These functions allow to search for PCI/PCIe capabilities in a uniform
way. Both functions allow to search either a specific capability or any
encountered next (by specifying CAP_ID_ANY as a capability ID) -- this may
be useful when we merely need to traverse the capability list one-by-one.
In both functions the 'pos' argument allows to continue searching from
last position (0 means to start from beginning).

In order not to probe PCIe Extended Capabilities existence every time,
xen_host_pci_find_next_ext_cap makes use of the new 'has_pcie_ext_caps'
field in XenHostPCIDevice structure which is filled only once (in
xen_host_pci_device_get).

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen-host-pci-device.c | 95 +---
 hw/xen/xen-host-pci-device.h |  5 ++-
 2 files changed, 85 insertions(+), 15 deletions(-)

diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
index eed8cc88e3..9d76b199af 100644
--- a/hw/xen/xen-host-pci-device.c
+++ b/hw/xen/xen-host-pci-device.c
@@ -14,6 +14,7 @@
 
 #define XEN_HOST_PCI_MAX_EXT_CAP \
 ((PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / (PCI_CAP_SIZEOF + 4))
+#define XEN_HOST_PCI_CAP_MAX 48
 
 #ifdef XEN_HOST_PCI_DEVICE_DEBUG
 #  define XEN_HOST_PCI_LOG(f, a...) fprintf(stderr, "%s: " f, __func__, ##a)
@@ -199,6 +200,19 @@ static bool xen_host_pci_dev_is_virtfn(XenHostPCIDevice *d)
 return !stat(path, );
 }
 
+static bool xen_host_pci_dev_has_pcie_ext_caps(XenHostPCIDevice *d)
+{
+uint32_t header;
+
+if (xen_host_pci_get_long(d, PCI_CONFIG_SPACE_SIZE, ))
+return false;
+
+if (header == 0 || header == ~0U)
+return false;
+
+return true;
+}
+
 static void xen_host_pci_config_open(XenHostPCIDevice *d, Error **errp)
 {
 char path[PATH_MAX];
@@ -297,37 +311,89 @@ int xen_host_pci_set_block(XenHostPCIDevice *d, int pos, 
uint8_t *buf, int len)
 return xen_host_pci_config_write(d, pos, buf, len);
 }
 
-int xen_host_pci_find_ext_cap_offset(XenHostPCIDevice *d, uint32_t cap)
+int xen_host_pci_find_next_ext_cap(XenHostPCIDevice *d, int pos, uint32_t cap)
 {
 uint32_t header = 0;
 int max_cap = XEN_HOST_PCI_MAX_EXT_CAP;
-int pos = PCI_CONFIG_SPACE_SIZE;
+
+if (!d->has_pcie_ext_caps)
+return 0;
+
+if (!pos) {
+pos = PCI_CONFIG_SPACE_SIZE;
+} else {
+if (xen_host_pci_get_long(d, pos, ))
+return 0;
+
+pos = PCI_EXT_CAP_NEXT(header);
+}
 
 do {
-if (xen_host_pci_get_long(d, pos, )) {
+if (!pos || pos < PCI_CONFIG_SPACE_SIZE)
+break;
+
+if (xen_host_pci_get_long(d, pos, ))
 break;
-}
 /*
  * If we have no capabilities, this is indicated by cap ID,
  * cap version and next pointer all being 0.
+ * Also check for all F's returned (which means PCIe ext conf space
+ * is unreadable for some reason)
  */
-if (header == 0) {
+if (header == 0 || header == ~0U)
 break;
-}
 
-if (PCI_EXT_CAP_ID(header) == cap) {
+if (cap == CAP_ID_ANY)
+return pos;
+else if (PCI_EXT_CAP_ID(header) == cap)
 return pos;
-}
 
 pos = PCI_EXT_CAP_NEXT(header);
-if (pos < PCI_CONFIG_SPACE_SIZE) {
+} while (--max_cap);
+
+return 0;
+}
+
+int xen_host_pci_find_next_cap(XenHostPCIDevice *d, int pos, uint32_t cap)
+{
+uint8_t id;
+unsigned max_cap = XEN_HOST_PCI_CAP_MAX;
+uint8_t status = 0;
+uint8_t curpos;
+
+if (xen_host_pci_get_byte(d, PCI_STATUS, ))
+return 0;
+
+if ((status & PCI_STATUS_CAP_LIST) == 0)
+return 0;
+
+if (pos < PCI_CAPABILITY_LIST) {
+curpos = PCI_CAPABILITY_LIST;
+} else {
+curpos = (uint8_t) pos;
+}
+
+while (max_cap--) {
+if (xen_host_pci_get_byte(d, curpos, ))
+break;
+if (!curpos)
 break;
-}
 
-max_cap--;
-} while (max_cap > 0);
+if (cap == CAP_ID_ANY)
+return curpos;
 
-return -1;
+if (xen_host_pci_get_byte(d, curpos + PCI_CAP_LIST_ID, ))
+break;
+
+if (id == 0xff)
+break;
+else if (id == cap)
+return curpos;
+
+curpos += PCI_CAP_LIST_NEXT;
+}
+
+return 0;
 }
 
 void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain,
@@ -377,7 +443,8 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t 
domain,
 }
 d->class_code = v;
 
-d->is_virtfn = xen_host_pci_dev_is_virtfn(d);
+d->is_virtfn = xen_host_pci_dev_is_virtfn(d);
+d->has_pcie_ext_caps = xen_host_pci_dev_has_pcie_ext_caps(d);
 

[Qemu-devel] [RFC PATCH 20/30] xen/pt: determine the legacy/PCIe mode for a passed through device

2018-03-12 Thread Alexey Gerasimenko
Even if we have some real PCIe device being passed through to a guest,
there are situations when we cannot use its PCIe features, primarily
allowing to access extended (>256) config space.

Basically, we can allow reading PCIe extended config space only if both
the device and emulated system are PCIe-capable. So it's a combination
of checks:
- PCI Express capability presence
- pci_is_express(device)
- pci_bus_is_express(device bus)

The AND-product of these checks is stored to pcie_enabled_dev flag
in XenPCIPassthroughState for later use in functions like
xen_pt_pci_config_access_check.

This way we get consistent behavior when the same PCIe device being passed
through to either i440 domain or Q35 one.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen_pt.c | 28 ++--
 hw/xen/xen_pt.h |  1 +
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 9b7a960de1..a902a9b685 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -687,6 +687,21 @@ static const MemoryListener xen_pt_io_listener = {
 .priority = 10,
 };
 
+static inline bool xen_pt_dev_is_pcie_mode(PCIDevice *d)
+{
+XenPCIPassthroughState *s = XEN_PT_DEVICE(d);
+PCIBus *bus = pci_get_bus(d);
+
+if (bus != NULL) {
+if (pci_is_express(d) && pci_bus_is_express(bus) &&
+xen_host_pci_find_next_cap(>real_device, 0, PCI_CAP_ID_EXP)) {
+return true;
+}
+}
+
+return false;
+}
+
 static void
 xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s,
   XenHostPCIDevice *dev)
@@ -794,8 +809,17 @@ static void xen_pt_realize(PCIDevice *d, Error **errp)
s->real_device.dev, s->real_device.func);
 }
 
-/* Initialize virtualized PCI configuration (Extended 256 Bytes) */
-memset(d->config, 0, PCI_CONFIG_SPACE_SIZE);
+s->pcie_enabled_dev = xen_pt_dev_is_pcie_mode(d);
+if (s->pcie_enabled_dev) {
+XEN_PT_LOG(d, "Host device %04x:%02x:%02x.%d passed thru "
+   "in PCIe mode\n", s->real_device.domain,
+s->real_device.bus, s->real_device.dev,
+s->real_device.func);
+}
+
+/* Initialize virtualized PCI configuration space (256/4K bytes) */
+memset(d->config, 0, pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE
+   : PCI_CONFIG_SPACE_SIZE);
 
 s->memory_listener = xen_pt_memory_listener;
 s->io_listener = xen_pt_io_listener;
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index aa39a9aa5f..1204acbdce 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -212,6 +212,7 @@ struct XenPCIPassthroughState {
 
 PCIHostDeviceAddress hostaddr;
 bool is_virtfn;
+bool pcie_enabled_dev;
 bool permissive;
 bool permissive_warned;
 XenHostPCIDevice real_device;
-- 
2.11.0




[Qemu-devel] [RFC PATCH 19/30] xen/pt: avoid reading PCIe device type and cap version multiple times

2018-03-12 Thread Alexey Gerasimenko
xen_pt_config_init.c reads Device/Port Type and Capability version fields
in many places. Two functions are used for this purpose:
get_capability_version and get_device_type. These functions perform PCI
conf space reading every time they're called. Another bad thing is that
these functions know nothing about where PCI Expess Capability is located,
so its offset must be provided explicitly in function arguments. Their
typical usage is like this:
uint8_t cap_ver = get_capability_version(s, real_offset - reg->offset);
uint8_t dev_type = get_device_type(s, real_offset - reg->offset);

To avoid this, the PCI Express Capability register now being read only
once and stored in  XenHostPCIDevice structure (pcie_flags field). The
capabiliy offset parameter is no longer needed, simplifying functions
usage. Also, get_device_type and get_capability_version were renamed
to more descriptive get_pcie_device_type and get_pcie_capability_version.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/xen/xen-host-pci-device.c | 15 +++
 hw/xen/xen-host-pci-device.h |  1 +
 hw/xen/xen_pt_config_init.c  | 34 ++
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c
index 9d76b199af..11e9e26d31 100644
--- a/hw/xen/xen-host-pci-device.c
+++ b/hw/xen/xen-host-pci-device.c
@@ -402,6 +402,7 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t 
domain,
 {
 unsigned int v;
 Error *err = NULL;
+int pcie_cap_pos;
 
 d->config_fd = -1;
 d->domain = domain;
@@ -446,6 +447,20 @@ void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t 
domain,
 d->is_virtfn = xen_host_pci_dev_is_virtfn(d);
 d->has_pcie_ext_caps = xen_host_pci_dev_has_pcie_ext_caps(d);
 
+/* read and store PCIe Capabilities field for later use */
+pcie_cap_pos = xen_host_pci_find_next_cap(d, 0, PCI_CAP_ID_EXP);
+
+if (pcie_cap_pos) {
+if (xen_host_pci_get_word(d, pcie_cap_pos + PCI_EXP_FLAGS,
+  >pcie_flags)) {
+error_setg(, "Unable to read from PCI Express capability "
+   "structure at 0x%x", pcie_cap_pos);
+goto error;
+}
+} else {
+d->pcie_flags = 0x;
+}
+
 return;
 
 error:
diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h
index 37c5614a24..2884c4b4b9 100644
--- a/hw/xen/xen-host-pci-device.h
+++ b/hw/xen/xen-host-pci-device.h
@@ -27,6 +27,7 @@ typedef struct XenHostPCIDevice {
 uint16_t device_id;
 uint32_t class_code;
 int irq;
+uint16_t pcie_flags;
 
 XenHostPCIIORegion io_regions[PCI_NUM_REGIONS - 1];
 XenHostPCIIORegion rom;
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index a3ce33e78b..02e8c97f3c 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -828,24 +828,18 @@ static XenPTRegInfo xen_pt_emu_reg_vendor[] = {
  * PCI Express Capability
  */
 
-static inline uint8_t get_capability_version(XenPCIPassthroughState *s,
- uint32_t offset)
+static inline uint8_t get_pcie_capability_version(XenPCIPassthroughState *s)
 {
-uint8_t flag;
-if (xen_host_pci_get_byte(>real_device, offset + PCI_EXP_FLAGS, )) 
{
-return 0;
-}
-return flag & PCI_EXP_FLAGS_VERS;
+assert(s->real_device.pcie_flags != 0x);
+
+return (uint8_t) (s->real_device.pcie_flags & PCI_EXP_FLAGS_VERS);
 }
 
-static inline uint8_t get_device_type(XenPCIPassthroughState *s,
-  uint32_t offset)
+static inline uint8_t get_pcie_device_type(XenPCIPassthroughState *s)
 {
-uint8_t flag;
-if (xen_host_pci_get_byte(>real_device, offset + PCI_EXP_FLAGS, )) 
{
-return 0;
-}
-return (flag & PCI_EXP_FLAGS_TYPE) >> 4;
+assert(s->real_device.pcie_flags != 0x);
+
+return (uint8_t) ((s->real_device.pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4);
 }
 
 /* initialize Link Control register */
@@ -853,8 +847,8 @@ static int xen_pt_linkctrl_reg_init(XenPCIPassthroughState 
*s,
 XenPTRegInfo *reg, uint32_t real_offset,
 uint32_t *data)
 {
-uint8_t cap_ver = get_capability_version(s, real_offset - reg->offset);
-uint8_t dev_type = get_device_type(s, real_offset - reg->offset);
+uint8_t cap_ver  = get_pcie_capability_version(s);
+uint8_t dev_type = get_pcie_device_type(s);
 
 /* no need to initialize in case of Root Complex Integrated Endpoint
  * with cap_ver 1.x
@@ -871,7 +865,7 @@ static int xen_pt_devctrl2_reg_init(XenPCIPassthroughState 
*s,
 XenPTRegInfo *reg, uint32_t real_offset,
 uint32_t *data

[Qemu-devel] [RFC PATCH 14/30] pc/q35: Apply PCI bus BSEL property for Xen PCI device hotplug

2018-03-12 Thread Alexey Gerasimenko
On Q35 we still need to assign BSEL property to bus(es) for PCI device
add/hotplug to work.
Extend acpi_set_pci_info() function to support Q35 as well. Previously
it was limited to find_i440fx() call, this patch adds new (trivial)
function find_q35() which returns root PCIBus object on Q35, in a way
similar to what find_i440fx does.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/acpi/pcihp.c  | 6 +-
 hw/pci-host/q35.c| 8 
 include/hw/i386/pc.h | 3 +++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index 91c82fdc7a..f70d8620d7 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -105,7 +105,11 @@ static void acpi_set_pci_info(void)
 }
 bsel_is_set = true;
 
-bus = find_i440fx(); /* TODO: Q35 support */
+bus = find_i440fx();
+if (!bus) {
+bus = find_q35();
+}
+
 if (bus) {
 /* Scan all PCI buses. Set property to enable acpi based hotplug. */
 pci_for_each_bus_depth_first(bus, acpi_set_bsel, NULL, _alloc);
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index a36a1195e4..8c1603fce9 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -258,6 +258,14 @@ static void q35_host_initfn(Object *obj)
 IO_APIC_DEFAULT_ADDRESS - 1);
 }
 
+PCIBus *find_q35(void)
+{
+PCIHostState *s = OBJECT_CHECK(PCIHostState,
+   object_resolve_path("/machine/q35", NULL),
+   TYPE_PCI_HOST_BRIDGE);
+return s ? s->bus : NULL;
+}
+
 static const TypeInfo q35_host_info = {
 .name   = TYPE_Q35_HOST_DEVICE,
 .parent = TYPE_PCIE_HOST_BRIDGE,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index bb49165fe0..96d74b35bd 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -302,6 +302,9 @@ PCIBus *find_i440fx(void);
 extern PCIDevice *piix4_dev;
 int piix4_init(PCIBus *bus, ISABus **isa_bus, int devfn);
 
+/* q35.c */
+PCIBus *find_q35(void);
+
 /* pc_sysfw.c */
 void pc_system_firmware_init(MemoryRegion *rom_memory,
  bool isapc_ram_fw);
-- 
2.11.0




[Qemu-devel] [RFC PATCH 13/30] pc/xen: Xen Q35 support: provide IRQ handling for PCI devices

2018-03-12 Thread Alexey Gerasimenko
The primary difference in PCI device IRQ management between Xen HVM and
QEMU is that Xen PCI IRQs are "device-centric" while QEMU PCI IRQs are
"chipset-centric". Namely, Xen uses PCI device BDF and INTx as coordinates
to assert IRQ while QEMU finds out to which chipset PIRQ the IRQ is routed
through the hierarchy of PCI buses and manages IRQ assertion on chipset
side (as PIRQ inputs).

Two callback functions are used for this purpose: .map_irq and .set_irq
(named after corresponding structure fields). Corresponding Xen-specific
callback functions are piix3_set_irq() and pci_slot_get_pirq(). In Xen
case these functions do not operate on pirq pin numbers. Instead, they use
a specific value to pass BDF/INTx information between .map_irq and
.set_irq -- PCI device devfn and INTx pin number are combined into
pseudo-PIRQ in pci_slot_get_pirq, which piix3_set_irq later decodes back
into devfn and INTx number for passing to *set_pci_intx_level() call.

For Xen on Q35 this scheme is still applicable, with the exception that
function names are non-descriptive now and need to be renamed to show
their common i440/Q35 nature. Proposed new names are:

xen_pci_slot_get_pirq --> xen_cmn_pci_slot_get_pirq
xen_piix3_set_irq --> xen_cmn_set_irq

Another IRQ-related difference between i440 and Q35 is the number of PIRQ
inputs and PIRQ routers (PCI IRQ links in terms of ACPI) available. i440
has 4 PCI interrupt links, while Q35 has 8 (PIRQA...PIRQH).
Currently Xen have support for only 4 PCI links, so we describe only 4 of
8 PCI links in ACPI tables. Also, hvmloader disables PIRQ routing for
PIRQE..PIRQH by writing 80h into corresponding PIRQ[n]_ROUT registers.

All this PCI interrupt routing stuff is largely an ancient legacy from PIC
era. It's hardly worth to extend number of PCI links supported as we
normally deal with APIC mode and/or MSI interrupts.

The only useful thing to do with PIRQE..PIRQH routing currently is to
check if guest actually attempts to use it for some reason (despite ACPI
PCI routing information provided). In this case, a warning is logged.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/i386/pc_q35.c   | 13 ++---
 hw/i386/xen/xen-hvm.c  | 32 +---
 hw/isa/lpc_ich9.c  |  4 
 hw/pci-host/piix.c |  2 +-
 include/hw/i386/ich9.h |  1 +
 include/hw/xen/xen.h   |  5 +++--
 stubs/xen-hvm.c|  8 ++--
 7 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 0c0bc48137..0db670f6d7 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -203,9 +203,16 @@ static void pc_q35_init(MachineState *machine)
 for (i = 0; i < GSI_NUM_PINS; i++) {
 qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, pcms->gsi[i]);
 }
-pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc,
- ICH9_LPC_NB_PIRQS);
-pci_bus_set_route_irq_fn(host_bus, ich9_route_intx_pin_to_irq);
+
+if (xen_enabled()) {
+pci_bus_irqs(host_bus, xen_cmn_set_irq, xen_cmn_pci_slot_get_pirq,
+ ich9_lpc, ICH9_XEN_NUM_IRQ_SOURCES);
+} else {
+pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc,
+ ICH9_LPC_NB_PIRQS);
+pci_bus_set_route_irq_fn(host_bus, ich9_route_intx_pin_to_irq);
+}
+
 isa_bus = ich9_lpc->isa_bus;
 
 if (kvm_pic_in_kernel()) {
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index f24b7d4923..40a5c13fa6 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -13,6 +13,7 @@
 #include "cpu.h"
 #include "hw/pci/pci.h"
 #include "hw/i386/pc.h"
+#include "hw/i386/ich9.h"
 #include "hw/i386/apic-msidef.h"
 #include "hw/xen/xen_common.h"
 #include "hw/xen/xen_backend.h"
@@ -115,14 +116,14 @@ typedef struct XenIOState {
 Notifier wakeup;
 } XenIOState;
 
-/* Xen specific function for piix pci */
+/* Xen-specific functions for pci dev IRQ handling */
 
-int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
+int xen_cmn_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
 {
 return irq_num + ((pci_dev->devfn >> 3) << 2);
 }
 
-void xen_piix3_set_irq(void *opaque, int irq_num, int level)
+void xen_cmn_set_irq(void *opaque, int irq_num, int level)
 {
 xen_set_pci_intx_level(xen_domid, 0, 0, irq_num >> 2,
irq_num & 3, level);
@@ -145,6 +146,31 @@ void xen_piix_pci_write_config_client(uint32_t address, 
uint32_t val, int len)
 }
 }
 
+void xen_ich9_pci_write_config_client(uint32_t address, uint32_t val, int len)
+{
+static bool pirqe_f_warned = false;
+
+if (ranges_overlap(address, len, ICH9_LPC_PIRQA_ROUT, 4)) {
+/* handle PIRQA..PIRQD routing */
+xen_piix_pci_write_config_client(address, val, len);
+} else if (ranges_overlap(address, len, ICH9_LPC_PIRQE_

[Qemu-devel] [RFC PATCH 15/30] q35/acpi/xen: Provide ACPI PCI hotplug interface for Xen on Q35

2018-03-12 Thread Alexey Gerasimenko
This patch allows to use ACPI PCI hotplug functionality for Xen on Q35.
All added code depends on xen_enabled(), so no functionality change for
non-Xen usage.

We need to call the acpi_set_pci_info function from ich9_pm_init as well,
so it was made globally visible again (as it was before).

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/acpi/ich9.c  | 24 
 hw/acpi/pcihp.c |  2 +-
 include/hw/acpi/ich9.h  |  2 ++
 include/hw/acpi/pcihp.h |  2 ++
 4 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index c5d8646abc..62e2582e1a 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -37,6 +37,7 @@
 
 #include "hw/i386/ich9.h"
 #include "hw/mem/pc-dimm.h"
+#include "hw/xen/xen.h"
 
 //#define DEBUG
 
@@ -258,6 +259,10 @@ static void pm_reset(void *opaque)
 pm->smi_en_wmask = ~0;
 
 acpi_update_sci(>acpi_regs, pm->irq);
+
+if (xen_enabled()) {
+acpi_pcihp_reset(>acpi_pci_hotplug);
+}
 }
 
 static void pm_powerdown_req(Notifier *n, void *opaque)
@@ -300,6 +305,17 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,
 pm->powerdown_notifier.notify = pm_powerdown_req;
 qemu_register_powerdown_notifier(>powerdown_notifier);
 
+if (xen_enabled()) {
+PCIBus *bus = pci_get_bus(lpc_pci);
+
+qbus_set_hotplug_handler(BUS(bus), DEVICE(lpc_pci), _abort);
+
+acpi_pcihp_init(OBJECT(lpc_pci), >acpi_pci_hotplug, bus,
+pci_address_space_io(lpc_pci), false);
+
+acpi_set_pci_info();
+}
+
 legacy_acpi_cpu_hotplug_init(pci_address_space_io(lpc_pci),
 OBJECT(lpc_pci), >gpe_cpu, ICH9_CPU_HOTPLUG_IO_BASE);
 
@@ -496,6 +512,10 @@ void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 acpi_memory_plug_cb(hotplug_dev, >pm.acpi_memory_hotplug,
 dev, errp);
 }
+} else if (xen_enabled() &&
+   object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+acpi_pcihp_device_plug_cb(hotplug_dev, >pm.acpi_pci_hotplug,
+  dev, errp);
 } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
 if (lpc->pm.cpu_hotplug_legacy) {
 legacy_acpi_cpu_plug_cb(hotplug_dev, >pm.gpe_cpu, dev, errp);
@@ -522,6 +542,10 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler 
*hotplug_dev,
!lpc->pm.cpu_hotplug_legacy) {
 acpi_cpu_unplug_request_cb(hotplug_dev, >pm.cpuhp_state,
dev, errp);
+} else if (xen_enabled() &&
+   object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+acpi_pcihp_device_unplug_cb(hotplug_dev, >pm.acpi_pci_hotplug,
+dev, errp);
 } else {
 error_setg(errp, "acpi: device unplug request for not supported device"
" type: %s", object_get_typename(OBJECT(dev)));
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index f70d8620d7..d822f93293 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -94,7 +94,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
 return bsel_alloc;
 }
 
-static void acpi_set_pci_info(void)
+void acpi_set_pci_info(void)
 {
 static bool bsel_is_set;
 PCIBus *bus;
diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h
index 59aeb06393..4a47d93745 100644
--- a/include/hw/acpi/ich9.h
+++ b/include/hw/acpi/ich9.h
@@ -26,6 +26,7 @@
 #include "hw/acpi/cpu.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/acpi_dev_interface.h"
+#include "hw/acpi/pcihp.h"
 #include "hw/acpi/tco.h"
 
 typedef struct ICH9LPCPMRegs {
@@ -52,6 +53,7 @@ typedef struct ICH9LPCPMRegs {
 bool cpu_hotplug_legacy;
 AcpiCpuHotplug gpe_cpu;
 CPUHotplugState cpuhp_state;
+AcpiPciHpState acpi_pci_hotplug;
 
 MemHotplugState acpi_memory_hotplug;
 
diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h
index 8a65f99fc8..0a685dd228 100644
--- a/include/hw/acpi/pcihp.h
+++ b/include/hw/acpi/pcihp.h
@@ -64,6 +64,8 @@ void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, 
AcpiPciHpState *s,
 /* Called on reset */
 void acpi_pcihp_reset(AcpiPciHpState *s);
 
+void acpi_set_pci_info(void);
+
 extern const VMStateDescription vmstate_acpi_pcihp_pci_status;
 
 #define VMSTATE_PCI_HOTPLUG(pcihp, state, test_pcihp) \
-- 
2.11.0




[Qemu-devel] [RFC PATCH 16/30] q35/xen: Add Xen platform device support for Q35

2018-03-12 Thread Alexey Gerasimenko
Current Xen/QEMU method to control Xen Platform device on i440 is a bit
odd -- enabling/disabling Xen platform device actually modifies the QEMU
emulated machine type, namely xenfv <--> pc.

In order to avoid multiplying machine types, use a new way to control Xen
Platform device for QEMU -- "xen-platform-dev" machine property (bool).
To maintain backward compatibility with existing Xen/QEMU setups, this
is only applicable to q35 machine currently. i440 emulation still uses the
old method (i.e. xenfv/pc machine selection) to control Xen Platform
device, this may be changed later to xen-platform-dev property as well.

This way we can use a single machine type (q35) and change just
xen-platform-dev value to on/off to control Xen platform device.

Signed-off-by: Alexey Gerasimenko <x19...@gmail.com>
---
 hw/core/machine.c   | 21 +
 hw/i386/pc_q35.c| 14 ++
 include/hw/boards.h |  1 +
 qemu-options.hx |  1 +
 4 files changed, 37 insertions(+)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 5e2bbcdace..205e7da3ce 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -290,6 +290,20 @@ static void machine_set_igd_gfx_passthru(Object *obj, bool 
value, Error **errp)
 ms->igd_gfx_passthru = value;
 }
 
+static bool machine_get_xen_platform_dev(Object *obj, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+return ms->xen_platform_dev;
+}
+
+static void machine_set_xen_platform_dev(Object *obj, bool value, Error **errp)
+{
+MachineState *ms = MACHINE(obj);
+
+ms->xen_platform_dev = value;
+}
+
 static char *machine_get_firmware(Object *obj, Error **errp)
 {
 MachineState *ms = MACHINE(obj);
@@ -595,6 +609,13 @@ static void machine_class_init(ObjectClass *oc, void *data)
 object_class_property_set_description(oc, "igd-passthru",
 "Set on/off to enable/disable igd passthrou", _abort);
 
+object_class_property_add_bool(oc, "xen-platform-dev",
+machine_get_xen_platform_dev,
+machine_set_xen_platform_dev, _abort);
+object_class_property_set_description(oc, "xen-platform-dev",
+"Set on/off to enable/disable Xen Platform device",
+_abort);
+
 object_class_property_add_str(oc, "firmware",
 machine_get_firmware, machine_set_firmware,
 _abort);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 0db670f6d7..62caf924cf 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -56,6 +56,18 @@
 /* ICH9 AHCI has 6 ports */
 #define MAX_SATA_PORTS 6
 
+static void q35_xen_hvm_init(MachineState *machine)
+{
+PCMachineState *pcms = PC_MACHINE(machine);
+
+if (xen_enabled()) {
+/* check if Xen Platform device is enabled */
+if (machine->xen_platform_dev) {
+pci_create_simple(pcms->bus, -1, "xen-platform");
+}
+}
+}
+
 /* PC hardware initialisation */
 static void pc_q35_init(MachineState *machine)
 {
@@ -207,6 +219,8 @@ static void pc_q35_init(MachineState *machine)
 if (xen_enabled()) {
 pci_bus_irqs(host_bus, xen_cmn_set_irq, xen_cmn_pci_slot_get_pirq,
  ich9_lpc, ICH9_XEN_NUM_IRQ_SOURCES);
+
+q35_xen_hvm_init(machine);
 } else {
 pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc,
  ICH9_LPC_NB_PIRQS);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index efb0a9edfd..f35fc1cc03 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -238,6 +238,7 @@ struct MachineState {
 bool usb;
 bool usb_disabled;
 bool igd_gfx_passthru;
+bool xen_platform_dev;
 char *firmware;
 bool iommu;
 bool suppress_vmdesc;
diff --git a/qemu-options.hx b/qemu-options.hx
index 6585058c6c..cee0b92028 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -38,6 +38,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
 "dump-guest-core=on|off include guest memory in a core 
dump (default=on)\n"
 "mem-merge=on|off controls memory merge support (default: 
on)\n"
 "igd-passthru=on|off controls IGD GFX passthrough support 
(default=off)\n"
+"xen-platform-dev=on|off controls Xen Platform device 
(default=off)\n"
 "aes-key-wrap=on|off controls support for AES key wrapping 
(default=on)\n"
 "dea-key-wrap=on|off controls support for DEA key wrapping 
(default=on)\n"
 "suppress-vmdesc=on|off disables self-describing migration 
(default=off)\n"
-- 
2.11.0




[Qemu-devel] [RFC PATCH 00/30] Xen Q35 Bringup patches + support for PCIe Extended Capabilities for passed through devices

2018-03-12 Thread Alexey Gerasimenko
 PCIe hotplug facilities
  (if there will be a benefit).
- For PCIe passthrough to work on Windows 7 and above, a specific
  workaround was implemented, which allows to use PCIe device passthrough
  on those guest OSes normally. In future, this should be changed to a new
  emulated PCI architecture for Xen -- providing support for simple PCI
  hierarchies, nested MMIO spaces, etc. Basically, we need at least
  to provide support for PCI-PCI bridges (PCIe Root Ports in our case).
  Currently Xen limited to bus 0 in many places, even in hypercall
  parameters. A detailed description of the issue can be found in the patch
  named "xen/pt: Xen PCIe passthrough support for Q35: bypass PCIe topology
  check".
- VM migration was not tested as the feature primarily targets the PCIe
  passthrough which doesn't compatible with migration anyway.

How to use the Q35 feature:

A new domain config option was implemented: device_model_machine. It's
a string which has following possible values:
- "i440" -- i440 emulation (default)
- "q35"  -- emulate a Q35 machine. By default, the storage interface is
  AHCI.

Note that omitting device_model_machine parameter means i440 system
by default, so the default behavior doesn't change for old domain config
files.

So, in order to enable Q35 emulation one need to specify the following
option in the domain config file:
device_model_machine="q35"

It is recommended to install the guest OS from scratch to avoid issues due
to the emulated platform change.

One extra note - if you're going to backport this series to some older QEMU
version, make sure you have this patch for AHCI DMA bug applied: [1].
Otherwise you will encounter  random Q35 guest hangups with "Bad RAM
offset" message logged in /var/log/xen. Recent QEMU versions have this
patch commited already.

Also, a commit [2] is required to be applied (for xen-pt.c) -- it is
available in the upstream QEMU currently, but not present in qemu-xen.

This is my first (somewhat) large contribution to Xen, so some mistakes
are to be expected. Most testing was done using previous version of patches
and Xen 4.8.x.

I plan to support and extend this series further, for now I expect some
comments/suggestions/testing results/bugreports.

[1]: https://lists.xen.org/archives/html/xen-devel/2017-07/msg01077.html
[2]: https://lists.gnu.org/archive/html/qemu-devel/2017-12/msg03572.html

Xen changes:
Alexey Gerasimenko (12):
  libacpi: new DSDT ACPI table for Q35
  Makefile: build and use new DSDT table for Q35
  hvmloader: add function to query an emulated machine type (i440/Q35)
  hvmloader: add ACPI enabling for Q35
  hvmloader: add Q35 DSDT table loading
  hvmloader: add basic Q35 support
  hvmloader: allocate MMCONFIG area in the MMIO hole + minor code
refactoring
  libxl: Q35 support (new option device_model_machine)
  libxl: Xen Platform device support for Q35
  libacpi: build ACPI MCFG table if requested
  hvmloader: use libacpi to build MCFG table
  docs: provide description for device_model_machine option

 docs/man/xl.cfg.pod.5.in |  27 ++
 tools/firmware/hvmloader/Makefile|   2 +-
 tools/firmware/hvmloader/config.h|   5 +
 tools/firmware/hvmloader/hvmloader.c |  11 +-
 tools/firmware/hvmloader/pci.c   | 289 --
 tools/firmware/hvmloader/pci_regs.h  |   7 +
 tools/firmware/hvmloader/util.c  | 130 -
 tools/firmware/hvmloader/util.h  |  10 +
 tools/libacpi/Makefile   |   9 +-
 tools/libacpi/acpi2_0.h  |  21 ++
 tools/libacpi/build.c|  42 +++
 tools/libacpi/dsdt_q35.asl   | 551 +++
 tools/libacpi/libacpi.h  |   4 +
 tools/libxl/libxl_dm.c   |  20 +-
 tools/libxl/libxl_types.idl  |   7 +
 tools/xl/xl_parse.c  |  14 +
 16 files changed, 1051 insertions(+), 98 deletions(-)
 create mode 100644 tools/libacpi/dsdt_q35.asl

QEMU changes:
Alexey Gerasimenko (18):
  pc/xen: Xen Q35 support: provide IRQ handling for PCI devices
  pc/q35: Apply PCI bus BSEL property for Xen PCI device hotplug
  q35/acpi/xen: Provide ACPI PCI hotplug interface for Xen on Q35
  q35/xen: Add Xen platform device support for Q35
  q35: Fix incorrect values for PCIEXBAR masks
  xen/pt: XenHostPCIDevice: provide functions for PCI Capabilities and
PCIe Extended Capabilities enumeration
  xen/pt: avoid reading PCIe device type and cap version multiple times
  xen/pt: determine the legacy/PCIe mode for a passed through device
  xen/pt: Xen PCIe passthrough support for Q35: bypass PCIe topology
check
  xen/pt: add support for PCIe Extended Capabilities and larger config
space
  xen/pt: handle PCIe Extended Capabilities Next register
  xen/pt: allow to hide PCIe Extended Capabilities
  xen/pt: add Vendor-specific PCIe Extended Capability descriptor and
sizing
  xen/pt: add fixed-size PCIe Extended Capabilities descriptors
  xen/pt: add A