On Wed, Sep 15, 2010 at 02:38:18PM +0900, Isaku Yamahata wrote: > This patch implements helper functions for pci express capability > and pci express extended capability allocation. > NOTE: presence detection depends on pci_qdev_init() change. > > Signed-off-by: Isaku Yamahata <yamah...@valinux.co.jp> > > --- > Changes v2 -> v3: > - don't use 0b gcc extension. use 0x instead. > - split out constants into pcie_regs.h for linux merge. > - export some helpers for pcie-aer split. > - split out aer helper functions from pcie.c to pcie_aer.c > - embed PCIExpressDevice into PCIDevice. > --- > Makefile.objs | 1 + > hw/pci.h | 12 + > hw/pcie.c | 638 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/pcie.h | 96 +++++++++ > qemu-common.h | 1 + > 5 files changed, 748 insertions(+), 0 deletions(-) > create mode 100644 hw/pcie.c > create mode 100644 hw/pcie.h > > diff --git a/Makefile.objs b/Makefile.objs > index 5f5a4c5..eeb5134 100644 > --- a/Makefile.objs > +++ b/Makefile.objs > @@ -186,6 +186,7 @@ hw-obj-$(CONFIG_PIIX4) += piix4.o > # PCI watchdog devices > hw-obj-y += wdt_i6300esb.o > > +hw-obj-y += pcie.o > hw-obj-y += msix.o msi.o > > # PCI network cards > diff --git a/hw/pci.h b/hw/pci.h > index 630631b..19e85f5 100644 > --- a/hw/pci.h > +++ b/hw/pci.h > @@ -9,6 +9,8 @@ > /* PCI includes legacy ISA access. */ > #include "isa.h" > > +#include "pcie.h" > + > /* PCI bus */ > > #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) > @@ -175,6 +177,9 @@ struct PCIDevice { > /* Offset of MSI capability in config space */ > uint8_t msi_cap; > > + /* PCI Express */ > + PCIExpressDevice exp; > + > /* Location of option rom */ > char *romfile; > ram_addr_t rom_offset; > @@ -389,6 +394,13 @@ static inline uint32_t pci_config_size(const PCIDevice > *d) > return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : > PCI_CONFIG_SPACE_SIZE; > } > > +/* These are pci express specific, so should belong to pcie.h. > + they're here to avoid mutual header dependency. */ > +static inline uint8_t pci_pcie_cap(const PCIDevice *d) > +{ > + return pci_is_express(d) ? d->exp.exp_cap : 0; > +} > +
This one seems useless: 0 is not right for how you use this function. Just use the field directly? > /* These are not pci specific. Should move into a separate header. > * Only pci.c uses them, so keep them here for now. > */ > diff --git a/hw/pcie.c b/hw/pcie.c > new file mode 100644 > index 0000000..a6f396b > --- /dev/null > +++ b/hw/pcie.c > @@ -0,0 +1,638 @@ > +/* > + * pcie.c > + * > + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> > + * VA Linux Systems Japan K.K. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "sysemu.h" > +#include "pci_bridge.h" > +#include "pcie.h" > +#include "msix.h" > +#include "msi.h" > +#include "pci_internals.h" > +#include "pcie_regs.h" > + > +//#define DEBUG_PCIE > +#ifdef DEBUG_PCIE > +# define PCIE_DPRINTF(fmt, ...) \ > + fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__) > +#else > +# define PCIE_DPRINTF(fmt, ...) do {} while (0) > +#endif > +#define PCIE_DEV_PRINTF(dev, fmt, ...) \ > + PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__) > + > +static inline const char *pcie_hp_event_name(enum PCIExpressHotPlugEvent > event) > +{ > + switch (event) { > + case PCI_EXP_HP_EV_ABP: > + return "attention button pushed"; > + case PCI_EXP_HP_EV_PDC: > + return "present detection changed"; > + case PCI_EXP_HP_EV_CCI: > + return "command completed"; > + default: > + break; > + } > + return "Unknown event"; > +} > + Nice, but so much code just for debug? print the code out inx hex ... > +/*************************************************************************** > + * pci express capability helper functions > + */ > +void pcie_notify(PCIDevice *dev, uint16_t vector, bool trigger, int level) Why is this not static? It makes sense for internal stuff possibly, but I think functions will need to know what to do: they can't treat msi/msix/irq identically anyway. The API seems confusing, I think this is what is creating code for you. Specifically level = 0 does not notify at all. So I think we need two: 1. pcie_assert_interrupt which sends msi or sets level to 1 2. pcie_deassert_interrupt which sets level to 0, or nothing for non msi. Then below you can e.g. if (!sltctrl) { pcie_deassert(...); return; } > +{ > + /* masking/masking interrupt is handled by upper layer. > + * i.e. msix_notify() for MSI-X > + * msi_notify() for MSI > + * pci_set_irq() for INTx > + */ > + PCIE_DEV_PRINTF(dev, "noitfy vector %d tirgger:%d level:%d\n", typo > + vector, trigger, level); > + if (msix_enabled(dev)) { > + if (trigger) { > + msix_notify(dev, vector); > + } > + } else if (msi_enabled(dev)) { > + if (trigger){ > + msi_notify(dev, vector); > + } > + } else { > + qemu_set_irq(dev->irq[0], level); always 0? really? This is INTA# - is this what the spec says? > + } > +} > + > +int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) > +{ > + int exp_cap; > + uint8_t *pcie_cap; > + > + assert(pci_is_express(dev)); > + > + exp_cap = pci_add_capability(dev, PCI_CAP_ID_EXP, offset, > + PCI_EXP_VER2_SIZEOF); > + if (exp_cap < 0) { > + return exp_cap; > + } > + dev->exp.exp_cap = exp_cap; > + > + /* already done in pci_qdev_init() */ > + assert(dev->cap_present & QEMU_PCI_CAP_EXPRESS); Hmm. Why do we set this flag in qdev init but do the rest of it here? > + > + pcie_cap = dev->config + pci_pcie_cap(dev); come on, just use exp_cap. > + > + /* capability register > + interrupt message number defaults to 0 */ > + pci_set_word(pcie_cap + PCI_EXP_FLAGS, > + ((type << PCI_EXP_FLAGS_TYPE_SHIFT) & PCI_EXP_FLAGS_TYPE) | > + PCI_EXP_FLAGS_VER2); > + > + /* device capability register > + * table 7-12: > + * roll based error reporting bit must be set by all > + * Functions conforming to the ECN, PCI Express Base > + * Specification, Revision 1.1., or subsequent PCI Express Base > + * Specification revisions. > + */ > + pci_set_long(pcie_cap + PCI_EXP_DEVCAP, PCI_EXP_DEVCAP_RBER); > + > + pci_set_long(pcie_cap + PCI_EXP_LNKCAP, > + (port << PCI_EXP_LNKCAP_PN_SHIFT) | > + PCI_EXP_LNKCAP_ASPMS_0S | > + PCI_EXP_LNK_MLW_1 | > + PCI_EXP_LNK_LS_25); > + > + pci_set_word(pcie_cap + PCI_EXP_LNKSTA, > + PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25); > + > + pci_set_long(pcie_cap + PCI_EXP_DEVCAP2, > + PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP); > + > + pci_set_word(dev->wmask + exp_cap, PCI_EXP_DEVCTL2_EETLPPB); > + return exp_cap; > +} > + > +void pcie_cap_exit(PCIDevice *dev) > +{ > + pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER2_SIZEOF); > +} > + > +uint8_t pcie_cap_get_type(const PCIDevice *dev) > +{ > + uint32_t pos = pci_pcie_cap(dev); > + assert(pos > 0); > + return (pci_get_word(dev->config + pos + PCI_EXP_FLAGS) & > + PCI_EXP_FLAGS_TYPE) >> PCI_EXP_FLAGS_TYPE_SHIFT; > +} > + > +/* MSI/MSI-X */ > +/* pci express interrupt message number */ > +void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector) > +{ > + uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev); > + uint16_t tmp; > + > + assert(vector <= 32); > + tmp = pci_get_word(pcie_cap + PCI_EXP_FLAGS); > + tmp &= ~PCI_EXP_FLAGS_IRQ; > + tmp |= vector << PCI_EXP_FLAGS_IRQ_SHIFT; > + pci_set_word(pcie_cap + PCI_EXP_FLAGS, tmp); > +} > + > +uint8_t pcie_cap_flags_get_vector(PCIDevice *dev) > +{ > + return (pci_get_word(dev->config + pci_pcie_cap(dev) + PCI_EXP_FLAGS) & > + PCI_EXP_FLAGS_IRQ) >> PCI_EXP_FLAGS_IRQ_SHIFT; > +} > + > +void pcie_cap_deverr_init(PCIDevice *dev) > +{ > + uint32_t pos = pci_pcie_cap(dev); > + uint8_t *pcie_cap = dev->config + pos; > + uint8_t *pcie_wmask = dev->wmask + pos; > + uint8_t *pcie_w1cmask = dev->wmask + pos; > + > + pci_set_long(pcie_cap + PCI_EXP_DEVCAP, > + pci_get_long(pcie_cap + PCI_EXP_DEVCAP) | > + PCI_EXP_DEVCAP_RBER); > + > + pci_set_long(pcie_wmask + PCI_EXP_DEVCTL, > + pci_get_long(pcie_wmask + PCI_EXP_DEVCTL) | > + PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | > + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE); > + > + pci_set_long(pcie_w1cmask + PCI_EXP_DEVSTA, > + pci_get_long(pcie_w1cmask + PCI_EXP_DEVSTA) | > + PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED | > + PCI_EXP_DEVSTA_URD | PCI_EXP_DEVSTA_URD); > +} > + > +void pcie_cap_deverr_reset(PCIDevice *dev) > +{ > + uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev); > + pci_set_long(pcie_cap + PCI_EXP_DEVCTL, > + pci_get_long(pcie_cap + PCI_EXP_DEVCTL) & > + ~(PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | > + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)); > +} > + > +/* > + * events: PCI_EXP_HP_EV_xxx > + * status: bit or of PCI_EXP_SLTSTA_xxx or of? also - make status the right enum then? Could you replace the comment above with one describing what this is supposed to do? Why can't users simply call pcie_assert_interrupt directly? The below comments are based on an incomplete understanding of the below function. > + */ > +static void pcie_cap_slot_event(PCIDevice *dev, > + enum PCIExpressHotPlugEvent events, > + uint16_t status) > +{ Most of the code seems to simply validate inputs. But why? You always pass in valid numbers also events -> event, you always pass a single one. It looks like it'll be simpler if you just assume a single event, and move the status bit tweaking outside of this function, it is only useful for PDS ... The we will get a straightforward > + bool trigger; > + int level; > + uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev); > + uint16_t sltctl = pci_get_word(pcie_cap + PCI_EXP_SLTCTL); > + uint16_t sltsta = pci_get_word(pcie_cap + PCI_EXP_SLTSTA); > + > + PCIE_DEV_PRINTF(dev, > + "sltctl: 0x%0x2 sltsta: 0x%02x event:%x %s status:%d\n", > + sltctl, sltsta, > + events, pcie_hp_event_name(events), status); > + events &= PCI_EXP_HP_EV_SUPPORTED; > + if ((sltctl & PCI_EXP_SLTCTL_HPIE) && (sltctl & events) && > + ((sltsta ^ events) & events) /* 0 -> 1 */) { > + trigger = true; > + } else { > + trigger = false; > + } > + > + if (events & PCI_EXP_HP_EV_PDC) { > + sltsta &= ~PCI_EXP_SLTSTA_PDS; > + sltsta |= (status & PCI_EXP_SLTSTA_PDS); > + } > + sltsta |= events; > + pci_set_word(pcie_cap + PCI_EXP_SLTSTA, sltsta); > + PCIE_DEV_PRINTF(dev, "sltsta -> %02xn", sltsta); > + > + if ((sltctl & PCI_EXP_SLTCTL_HPIE) && (sltsta & > PCI_EXP_HP_EV_SUPPORTED)) { > + level = 1; > + } else { > + level = 0; > + } you can replace if with assignment here and elsewhere: level = (sltctl & PCI_EXP_SLTCTL_HPIE) && (sltsta & PCI_EXP_HP_EV_SUPPORTED); > + > + pcie_notify(dev, pcie_cap_flags_get_vector(dev), trigger, level); > +} > + > +static int pcie_cap_slot_hotplug(DeviceState *qdev, > + PCIDevice *pci_dev, int state) > +{ > + PCIDevice *d = DO_UPCAST(PCIDevice, qdev, qdev); > + uint8_t *pcie_cap = d->config + pci_pcie_cap(d); > + uint16_t sltsta = pci_get_word(pcie_cap + PCI_EXP_SLTSTA); > + > + if (!pci_dev->qdev.hotplugged) { > + assert(state); /* this case only happens machine creation. */ at machine creation? > + sltsta |= PCI_EXP_SLTSTA_PDS; > + pci_set_word(pcie_cap + PCI_EXP_SLTSTA, sltsta); > + return 0; > + } > + > + PCIE_DEV_PRINTF(pci_dev, "hotplug state: %d\n", state); > + if (sltsta & PCI_EXP_SLTSTA_EIS) { > + /* the slot is electromechanically locked. */ We'll need to produce some error here. > + return -EBUSY; > + } > + > + if (state) { > + if (PCI_FUNC(pci_dev->devfn) == 0) { > + /* event is per slot. Not per function > + * only generates event for function = 0. > + * When hot plug, populate functions > 0 > + * and then add function = 0 last. > + */ > + pcie_cap_slot_event(d, PCI_EXP_HP_EV_PDC, PCI_EXP_SLTSTA_PDS); > + } > + } else { > + PCIBridge *br; > + PCIBus *bus; > + DeviceState *next; > + if (PCI_FUNC(pci_dev->devfn) != 0) { > + /* event is per slot. Not per function. > + accepts function = 0 only. */ > + return -EINVAL; Can user or guest trigger this? If yes print an error. IF no, assert. > + } > + > + /* zap all functions. */ > + br = DO_UPCAST(PCIBridge, dev, d); > + bus = pci_bridge_get_sec_bus(br); > + QLIST_FOREACH_SAFE(qdev, &bus->qbus.children, sibling, next) { > + qdev_free(qdev); > + } > + > + pcie_cap_slot_event(d, PCI_EXP_HP_EV_PDC, 0); > + } > + return 0; > +} > + > +/* pci express slot for pci express root/downstream port > + PCI express capability slot registers */ > +void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot) > +{ > + uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev); > + uint8_t *pcie_wmask = dev->wmask + pci_pcie_cap(dev); > + uint8_t *pcie_w1cmask = dev->w1cmask + pci_pcie_cap(dev); > + uint32_t tmp; > + > + pci_set_word(pcie_cap + PCI_EXP_FLAGS, > + pci_get_word(pcie_cap + PCI_EXP_FLAGS) | > PCI_EXP_FLAGS_SLOT); > + > + tmp = pci_get_long(pcie_cap + PCI_EXP_SLTCAP); > + tmp &= PCI_EXP_SLTCAP_PSN; > + tmp |= > + (slot << PCI_EXP_SLTCAP_PSN_SHIFT) | > + PCI_EXP_SLTCAP_EIP | > + PCI_EXP_SLTCAP_HPS | > + PCI_EXP_SLTCAP_HPC | > + PCI_EXP_SLTCAP_PIP | > + PCI_EXP_SLTCAP_AIP | > + PCI_EXP_SLTCAP_ABP; > + pci_set_long(pcie_cap + PCI_EXP_SLTCAP, tmp); > + > + tmp = pci_get_word(pcie_cap + PCI_EXP_SLTCTL); > + tmp &= ~(PCI_EXP_SLTCTL_PIC | PCI_EXP_SLTCTL_AIC); > + tmp |= PCI_EXP_SLTCTL_PIC_OFF | PCI_EXP_SLTCTL_AIC_OFF; > + pci_set_word(pcie_cap + PCI_EXP_SLTCTL, tmp); > + pci_set_word(pcie_wmask + PCI_EXP_SLTCTL, > + pci_get_word(pcie_wmask + PCI_EXP_SLTCTL) | > + PCI_EXP_SLTCTL_PIC | > + PCI_EXP_SLTCTL_AIC | > + PCI_EXP_SLTCTL_HPIE | > + PCI_EXP_SLTCTL_CCIE | > + PCI_EXP_SLTCTL_PDCE | > + PCI_EXP_SLTCTL_ABPE); > + > + pci_set_word(pcie_w1cmask + PCI_EXP_SLTSTA, > + pci_get_word(pcie_w1cmask + PCI_EXP_SLTSTA) | > + PCI_EXP_HP_EV_SUPPORTED); > + > + pci_bus_hotplug(pci_bridge_get_sec_bus(DO_UPCAST(PCIBridge, dev, dev)), > + pcie_cap_slot_hotplug, &dev->qdev); > +} > + > +void pcie_cap_slot_reset(PCIDevice *dev) > +{ > + uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev); > + uint32_t tmp; > + > + PCIE_DEV_PRINTF(dev, "reset\n"); > + > + tmp = pci_get_word(pcie_cap + PCI_EXP_SLTCTL); > + tmp &= ~(PCI_EXP_SLTCTL_EIC | > + PCI_EXP_SLTCTL_PIC | > + PCI_EXP_SLTCTL_AIC | > + PCI_EXP_SLTCTL_HPIE | > + PCI_EXP_SLTCTL_CCIE | > + PCI_EXP_SLTCTL_PDCE | > + PCI_EXP_SLTCTL_ABPE); > + tmp |= PCI_EXP_SLTCTL_PIC_OFF | PCI_EXP_SLTCTL_AIC_OFF; > + pci_set_word(pcie_cap + PCI_EXP_SLTCTL, tmp); > + > + tmp = pci_get_word(pcie_cap + PCI_EXP_SLTSTA); > + tmp &= ~(PCI_EXP_SLTSTA_EIS | /* by reset, the lock is released */ > + PCI_EXP_SLTSTA_CC | > + PCI_EXP_SLTSTA_PDC | > + PCI_EXP_SLTSTA_ABP); > + pci_set_word(pcie_cap + PCI_EXP_SLTSTA, tmp); > +} > + > +void pcie_cap_slot_write_config(PCIDevice *dev, > + uint32_t addr, uint32_t val, int len, > + uint16_t sltctl_prev) > +{ > + uint32_t pos = pci_pcie_cap(dev); > + uint8_t *pcie_cap = dev->config + pos; > + uint16_t sltctl = pci_get_word(pcie_cap + PCI_EXP_SLTCTL); > + uint16_t sltsta = pci_get_word(pcie_cap + PCI_EXP_SLTSTA); > + > + PCIE_DEV_PRINTF(dev, > + "addr: 0x%x val: 0x%x len: %d\n" > + "\tsltctl_prev: 0x%02x sltctl: 0x%02x sltsta 0x%02x\n", > + addr, val, len, sltctl_prev, sltctl, sltsta); > + /* SLTSTA: process SLTSTA before SLTCTL to avoid spurious interrupt */ > + if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { > + sltsta = pci_get_word(pcie_cap + PCI_EXP_SLTSTA); > + > + /* write to stlsta results in clearing bits, > + so new interrupts won't be generated. */ > + PCIE_DEV_PRINTF(dev, "sltsta -> 0x%02x\n", sltsta); > + } > + > + /* SLTCTL */ > + if (ranges_overlap(addr, len, pos + PCI_EXP_SLTCTL, 2)) { > + PCIE_DEV_PRINTF(dev, "sltctl: 0x%02x -> 0x%02x\n", > + sltctl_prev, sltctl); > + if (pci_shift_word(addr, val, pos + PCI_EXP_SLTCTL) & This is too complex. Just make the bit writeable, test and clear, then change status. > + PCI_EXP_SLTCTL_EIC) { > + /* toggle PCI_EXP_SLTSTA_EIS */ > + sltsta = (sltsta & ~PCI_EXP_SLTSTA_EIS) | > + ((sltsta ^ PCI_EXP_SLTSTA_EIS) & PCI_EXP_SLTSTA_EIS); You mean sltsta ^= PCI_EXP_SLTSTA_EIS ? > + pci_set_word(pcie_cap + PCI_EXP_SLTSTA, sltsta); > + PCIE_DEV_PRINTF(dev, "PCI_EXP_SLTCTL_EIC: sltsta -> 0x%02x\n", > + sltsta); > + } > + > + if (sltctl & PCI_EXP_SLTCTL_HPIE) { > + bool trigger; > + int level; > + > + if (((sltctl_prev ^ sltctl) & sltctl) & PCI_EXP_HP_EV_SUPPORTED) > { kill extra () they are not helpful: if ((sltctl_prev ^ sltctl) & sltctl & PCI_EXP_HP_EV_SUPPORTED) > + /* 0 -> 1 */ > + trigger = true; > + } else { > + trigger = false; > + } > + if ((sltctl & sltsta) & PCI_EXP_HP_EV_SUPPORTED) { kill extra () they are not helpful > + level = 1; > + } else { > + level = 0; > + } What is this trying to implement? > + pcie_notify(dev, pcie_cap_flags_get_vector(dev), trigger, level); > + } > + > + if (!((sltctl_prev ^ sltctl) & PCI_EXP_SLTCTL_SUPPORTED)) { > + PCIE_DEV_PRINTF(dev, > + "sprious command completion slctl 0x%x -> > 0x%x\n", > + sltctl_prev, sltctl); > + } > + > + /* command completion. > + * Real hardware might take a while to complete > + * requested command because physical movement would be involved > + * like locking the electromechanical lock. > + * However in our case, command is completed instantaneously above, > + * so send a command completion event right now. > + */ > + /* set command completed bit */ > + pcie_cap_slot_event(dev, PCI_EXP_HP_EV_CCI, 0); > + } > +} > + > +void pcie_cap_slot_push_attention_button(PCIDevice *dev) > +{ > + pcie_cap_slot_event(dev, PCI_EXP_HP_EV_ABP, 0); > +} > + > +/* root control/capabilities/status. PME isn't emulated for now */ > +void pcie_cap_root_init(PCIDevice *dev) > +{ > + uint8_t pos = pci_pcie_cap(dev); > + pci_set_word(dev->wmask + pos + PCI_EXP_RTCTL, > + PCI_EXP_RTCTL_SECEE | PCI_EXP_RTCTL_SENFEE | > + PCI_EXP_RTCTL_SEFEE); > +} > + > +void pcie_cap_root_reset(PCIDevice *dev) > +{ > + uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev); > + pci_set_word(pcie_cap + PCI_EXP_RTCTL, 0); > +} > + > +/* function level reset(FLR) */ > +void pcie_cap_flr_init(PCIDevice *dev, pcie_flr_fn flr) > +{ > + uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev); > + pci_set_word(pcie_cap + PCI_EXP_DEVCAP, > + pci_get_word(pcie_cap + PCI_EXP_DEVCAP) | > PCI_EXP_DEVCAP_FLR); > + dev->exp.flr = flr; > +} > + > +void pcie_cap_flr_write_config(PCIDevice *dev, > + uint32_t addr, uint32_t val, int len) > +{ > + uint32_t pos = pci_pcie_cap(dev); > + if (ranges_overlap(addr, len, pos + PCI_EXP_DEVCTL, 2)) { > + uint16_t val16 = pci_shift_word(addr, val, pos + PCI_EXP_DEVCTL); > + if ((val16 & PCI_EXP_DEVCTL_BCR_FLR) && dev->exp.flr) { > + dev->exp.flr(dev); > + } > + } Just make FLR writeable, and clear it after calling reset. This will also make it possible for devices to detect that they are reset because of FLR. > +} > + > +/* Alternative Routing-ID Interpretation (ARI) */ > +/* ari forwarding support for down stream port */ > +void pcie_cap_ari_init(PCIDevice *dev) > +{ > + uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev); > + uint8_t *pcie_wmask = dev->wmask + pci_pcie_cap(dev); > + > + pci_set_long(pcie_cap + PCI_EXP_DEVCAP2, > + pci_get_long(pcie_cap + PCI_EXP_DEVCAP2) | > + PCI_EXP_DEVCAP2_ARI); > + > + pci_set_long(pcie_wmask + PCI_EXP_DEVCTL2, > + pci_get_long(pcie_wmask + PCI_EXP_DEVCTL2) | > + PCI_EXP_DEVCTL2_ARI); > +} > + > +void pcie_cap_ari_reset(PCIDevice *dev) > +{ > + uint8_t *pcie_cap = dev->config + pci_pcie_cap(dev); > + > + pci_set_long(pcie_cap + PCI_EXP_DEVCTL2, > + pci_get_long(pcie_cap + PCI_EXP_DEVCTL2) & > + ~PCI_EXP_DEVCTL2_ARI); > +} > + > +bool pcie_cap_is_ari_enabled(const PCIDevice *dev) > +{ > + if (!pci_is_express(dev)) { > + return false; > + } > + if (!pci_pcie_cap(dev)) { > + return false; > + } > + > + return pci_get_long(dev->config + pci_pcie_cap(dev) + PCI_EXP_DEVCTL2) & > + PCI_EXP_DEVCTL2_ARI; > +} > + > +/************************************************************************** > + * pci express extended capability allocation functions > + * uint16_t ext_cap_id (16 bit) > + * uint8_t cap_ver (4 bit) > + * uint16_t cap_offset (12 bit) > + * uint16_t ext_cap_size > + */ > + > +#define PCI_EXT_CAP_NO_ID ((uint16_t)0) /* 0 is reserved cap id. > + * use internally to find the > + * last capability in the > + * linked list > + */ better remove the macro and move the comment to where it's used. > + > +static uint16_t pcie_find_capability_list(PCIDevice *dev, uint16_t cap_id, > + uint16_t *prev_p) > +{ > + uint16_t prev = 0; > + uint16_t next = PCI_CONFIG_SPACE_SIZE; > + uint32_t header = pci_get_long(dev->config + next); next -> PCI_CONFIG_SPACE_SIZE in line above. > + > + if (!header) { > + /* no extended capability */ > + next = 0; > + goto out; > + } > + > + while (next) { will be clearer as a for loop? for (next = PCI_CONFIG_SPACE_SIZE; next; (prev = next),(next = PCI_EXT_CAP_NEXT(header))) > + assert(next >= PCI_CONFIG_SPACE_SIZE); > + assert(next <= PCIE_CONFIG_SPACE_SIZE - 8); > + > + header = pci_get_long(dev->config + next); > + if (PCI_EXT_CAP_ID(header) == cap_id) { > + break; > + } > + prev = next; > + next = PCI_EXT_CAP_NEXT(header); > + } > + > +out: > + if (prev_p) { > + *prev_p = prev; > + } > + return next; > +} > + > +uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id) > +{ > + return pcie_find_capability_list(dev, cap_id, NULL); > +} > + > +static void pcie_ext_cap_set_next(PCIDevice *dev, uint16_t pos, uint16_t > next) > +{ > + uint16_t header = pci_get_long(dev->config + pos); > + assert(!(next & (PCI_EXT_CAP_ALIGN - 1))); > + header = (header & ~PCI_EXT_CAP_NEXT_MASK) | > + ((next << PCI_EXT_CAP_NEXT_SHIFT) & PCI_EXT_CAP_NEXT_MASK); > + pci_set_long(dev->config + pos, header); > +} > + > +/* > + * caller must supply valid (offset, size) * such that the range shouldn't > + * overlap with other capability or other registers. > + * This function doesn't check it. > + */ > +void pcie_add_capability(PCIDevice *dev, > + uint16_t cap_id, uint8_t cap_ver, > + uint16_t offset, uint16_t size) > +{ > + uint32_t header; > + uint16_t next; > + > + assert(offset >= PCI_CONFIG_SPACE_SIZE); > + assert(offset < offset + size); > + assert(offset + size < PCIE_CONFIG_SPACE_SIZE); > + assert(size >= 8); > + assert(pci_is_express(dev)); > + > + if (offset == PCI_CONFIG_SPACE_SIZE) { > + header = pci_get_long(dev->config + offset); > + next = PCI_EXT_CAP_NEXT(header); > + } else { > + uint16_t prev; > + next = pcie_find_capability_list(dev, PCI_EXT_CAP_NO_ID, &prev); > + assert(next == 0); > + pcie_ext_cap_set_next(dev, prev, offset); > + } > + pci_set_long(dev->config + offset, PCI_EXT_CAP(cap_id, cap_ver, next)); > + > + /* Make capability read-only by default */ > + memset(dev->wmask + offset, 0, size); > + /* Check capability by default */ > + memset(dev->cmask + offset, 0xFF, size); > +} > + > +void pcie_del_capability(PCIDevice *dev, uint16_t cap_id, uint16_t size) > +{ > + uint16_t prev; > + uint16_t offset = pcie_find_capability_list(dev, cap_id, &prev); > + uint32_t header; > + > + assert(offset >= PCI_CONFIG_SPACE_SIZE); Should be assert(offset). This is what we return on error, right? > + header = pci_get_long(dev->config + offset); > + if (prev) { > + pcie_ext_cap_set_next(dev, prev, PCI_EXT_CAP_NEXT(header)); > + } else { > + /* move up next ext cap to PCI_CONFIG_SPACE_SIZE? */ Since we don't now, add assert that next is 0. > + assert(offset == PCI_CONFIG_SPACE_SIZE); > + pci_set_long(dev->config + offset, > + PCI_EXT_CAP(0, 0, PCI_EXT_CAP_NEXT(header))); > + } > + > + /* Make those registers read-only reserved zero */ So you make them readonly in both add and delete? delete should revert add: let's put the masks back the way they were: writeable. > + memset(dev->config + offset, 0, size); > + memset(dev->wmask + offset, 0, size); > + /* Clear cmask as device-specific registers can't be checked */ > + memset(dev->cmask + offset, 0, size); > +} > + > +/************************************************************************** > + * pci express extended capability helper functions > + */ > + > +/* ARI */ > +void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn) > +{ > + pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER, > + offset, PCI_ARI_SIZEOF); > + pci_set_long(dev->config + offset + PCI_ARI_CAP, > PCI_ARI_CAP_NFN(nextfn)); > +} > diff --git a/hw/pcie.h b/hw/pcie.h > new file mode 100644 > index 0000000..37713dc > --- /dev/null > +++ b/hw/pcie.h > @@ -0,0 +1,96 @@ > +/* > + * pcie.h > + * > + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> > + * VA Linux Systems Japan K.K. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#ifndef QEMU_PCIE_H > +#define QEMU_PCIE_H > + > +#include "hw.h" > +#include "pcie_regs.h" > + > +enum PCIExpressIndicator { > + /* for attention and power indicator */ > + PCI_EXP_HP_IND_RESERVED = PCI_EXP_SLTCTL_IND_RESERVED, > + PCI_EXP_HP_IND_ON = PCI_EXP_SLTCTL_IND_ON, > + PCI_EXP_HP_IND_BLINK = PCI_EXP_SLTCTL_IND_BLINK, > + PCI_EXP_HP_IND_OFF = PCI_EXP_SLTCTL_IND_OFF, > +}; > + > +enum PCIExpressHotPlugEvent { this is not how we name types, I think? Either typedef enum {} PCIExpressHotPlugEvent, or enum pcie_hot_plug_event { }. Same for other types. > + /* the bits match the bits in Slot Control/Status registers. > + * PCI_EXP_HP_EV_xxx = PCI_EXP_SLTCTL_xxxE = PCI_EXP_SLTSTA_xxx Is it important that they match? We don't assume this in code, do we? > + */ > + PCI_EXP_HP_EV_ABP = 0x01, /* attention button preseed > */ typo > + PCI_EXP_HP_EV_PDC = 0x08, /* presence detect changed */ > + PCI_EXP_HP_EV_CCI = 0x10, /* command completed */ > + > + PCI_EXP_HP_EV_SUPPORTED = 0x19, /* supported event mask */ Gave me pause until I saw PCI_EXP_HP_EV_SUPPORTED = (PCI_EXP_HP_EV_ABP | PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_CCI) so make this explicit? Also - non supported bits would always be readonly, right? So why do we need this and all the masking? I think we should be able to get away with checking the whole register is not 0, and get rid of this? > + /* events not listed aren't supported */ > +}; > + > +typedef void (*pcie_flr_fn)(PCIDevice *dev); Is flr special? Can't we use the generic reset handlers? If not why? > + > +struct PCIExpressDevice { > + /* Offset of express capability in config space */ > + uint8_t exp_cap; > + > + /* FLR */ > + pcie_flr_fn flr; > +}; > + > +void pcie_notify(PCIDevice *dev, uint16_t vector, bool trigger, int level); > + > +/* PCI express capability helper functions */ > +int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t > port); > +void pcie_cap_exit(PCIDevice *dev); > +uint8_t pcie_cap_get_type(const PCIDevice *dev); > +void pcie_cap_flags_set_vector(PCIDevice *dev, uint8_t vector); > +uint8_t pcie_cap_flags_get_vector(PCIDevice *dev); > + > +void pcie_cap_deverr_init(PCIDevice *dev); > +void pcie_cap_deverr_reset(PCIDevice *dev); > + > +void pcie_cap_slot_init(PCIDevice *dev, uint16_t slot); > +void pcie_cap_slot_reset(PCIDevice *dev); > +void pcie_cap_slot_write_config(PCIDevice *dev, > + uint32_t addr, uint32_t val, int len, > + uint16_t sltctl_prev); > +void pcie_cap_slot_push_attention_button(PCIDevice *dev); > + > +void pcie_cap_root_init(PCIDevice *dev); > +void pcie_cap_root_reset(PCIDevice *dev); > + > +void pcie_cap_flr_init(PCIDevice *dev, pcie_flr_fn flr); > +void pcie_cap_flr_write_config(PCIDevice *dev, > + uint32_t addr, uint32_t val, int len); > + > +void pcie_cap_ari_init(PCIDevice *dev); > +void pcie_cap_ari_reset(PCIDevice *dev); > +bool pcie_cap_is_ari_enabled(const PCIDevice *dev); > + > +/* PCI express extended capability helper functions */ > +uint16_t pcie_find_capability(PCIDevice *dev, uint16_t cap_id); > +void pcie_add_capability(PCIDevice *dev, > + uint16_t cap_id, uint8_t cap_ver, > + uint16_t offset, uint16_t size); > +void pcie_del_capability(PCIDevice *dev, uint16_t cap_id, uint16_t size); > + > +void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn); > + > +#endif /* QEMU_PCIE_H */ > diff --git a/qemu-common.h b/qemu-common.h > index d735235..6d9ee26 100644 > --- a/qemu-common.h > +++ b/qemu-common.h > @@ -219,6 +219,7 @@ typedef struct PCIHostState PCIHostState; > typedef struct PCIExpressHost PCIExpressHost; > typedef struct PCIBus PCIBus; > typedef struct PCIDevice PCIDevice; > +typedef struct PCIExpressDevice PCIExpressDevice; > typedef struct PCIBridge PCIBridge; > typedef struct SerialState SerialState; > typedef struct IRQState *qemu_irq; > -- > 1.7.1.1