On Wed, Sep 08, 2010 at 04:39:34PM +0900, Isaku Yamahata wrote: > implemented msi support functions. > > Signed-off-by: Isaku Yamahata <yamah...@valinux.co.jp>
Good stuff. Some minor corrections mostly to the comments. It's up to you whether to address or ignore them. > --- > Changes v1 -> v2: > - opencode some oneline helper function/macros for readability > - use ffs where appropriate > - rename some functions/variables as suggested. > - added assert() > - 1 -> 1U > - clear INTx# when MSI is enabled > - clear pending bits for freed vectors. > - check the requested number of vectors. > --- > Makefile.objs | 2 +- > hw/msi.c | 360 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/msi.h | 41 +++++++ > hw/pci.h | 10 +- > 4 files changed, 409 insertions(+), 4 deletions(-) > create mode 100644 hw/msi.c > create mode 100644 hw/msi.h > > diff --git a/Makefile.objs b/Makefile.objs > index 594894b..5f5a4c5 100644 > --- a/Makefile.objs > +++ b/Makefile.objs > @@ -186,7 +186,7 @@ hw-obj-$(CONFIG_PIIX4) += piix4.o > # PCI watchdog devices > hw-obj-y += wdt_i6300esb.o > > -hw-obj-y += msix.o > +hw-obj-y += msix.o msi.o > > # PCI network cards > hw-obj-y += ne2000.o > diff --git a/hw/msi.c b/hw/msi.c > new file mode 100644 > index 0000000..32dc953 > --- /dev/null > +++ b/hw/msi.c > @@ -0,0 +1,360 @@ > +/* > + * msi.c > + * > + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> > + * VA Linux Systems Japan K.K. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include "msi.h" > + > +/* Eventually those constants should go to Linux pci_regs.h */ > +#define PCI_MSI_PENDING_32 0x10 > +#define PCI_MSI_PENDING_64 0x14 > + > +/* PCI_MSI_ADDRESS_LO */ > +#define PCI_MSI_ADDRESS_LO_MASK (~0x3) > + > +/* If we get rid of cap allocator, we won't need those. */ > +#define PCI_MSI_32_SIZEOF 0x0a > +#define PCI_MSI_64_SIZEOF 0x0e > +#define PCI_MSI_32M_SIZEOF 0x14 > +#define PCI_MSI_64M_SIZEOF 0x18 > + > +/* If we get rid of cap allocator, we won't need this. */ > +static inline uint8_t msi_cap_sizeof(uint16_t flags) > +{ > + switch (flags & (PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT)) { > + case PCI_MSI_FLAGS_MASKBIT | PCI_MSI_FLAGS_64BIT: > + return PCI_MSI_64M_SIZEOF; > + case PCI_MSI_FLAGS_64BIT: > + return PCI_MSI_64_SIZEOF; > + case PCI_MSI_FLAGS_MASKBIT: > + return PCI_MSI_32M_SIZEOF; > + case 0: > + return PCI_MSI_32_SIZEOF; > + default: > + abort(); > + break; > + } > + return 0; > +} > + > +//#define MSI_DEBUG > + > +#ifdef MSI_DEBUG > +# define MSI_DPRINTF(fmt, ...) \ > + fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__) > +#else > +# define MSI_DPRINTF(fmt, ...) do { } while (0) > +#endif > +#define MSI_DEV_PRINTF(dev, fmt, ...) \ > + MSI_DPRINTF("%s:%x " fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__) > + > +static inline uint8_t msi_nr_vectors(uint16_t flags) > +{ > + return 1U << > + ((flags & PCI_MSI_FLAGS_QSIZE) >> (ffs(PCI_MSI_FLAGS_QSIZE) - 1)); > +} > + > +static inline uint8_t msi_flags_off(const PCIDevice* dev) > +{ > + return dev->msi_cap + PCI_MSI_FLAGS; > +} > + > +static inline uint8_t msi_address_lo_off(const PCIDevice* dev) > +{ > + return dev->msi_cap + PCI_MSI_ADDRESS_LO; > +} > + > +static inline uint8_t msi_address_hi_off(const PCIDevice* dev) > +{ > + return dev->msi_cap + PCI_MSI_ADDRESS_HI; > +} > + > +static inline uint8_t msi_data_off(const PCIDevice* dev, bool msi64bit) > +{ > + return dev->msi_cap + (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32); > +} > + > +static inline uint8_t msi_mask_off(const PCIDevice* dev, bool msi64bit) > +{ > + return dev->msi_cap + (msi64bit ? PCI_MSI_MASK_64 : PCI_MSI_MASK_32); > +} > + > +static inline uint8_t msi_pending_off(const PCIDevice* dev, bool msi64bit) > +{ > + return dev->msi_cap + (msi64bit ? PCI_MSI_PENDING_64 : > PCI_MSI_PENDING_32); > +} > + > +bool msi_enabled(const PCIDevice *dev) > +{ > + return msi_present(dev) && > + (pci_get_word(dev->config + msi_flags_off(dev)) & > + PCI_MSI_FLAGS_ENABLE); > +} > + > +int msi_init(struct PCIDevice *dev, uint8_t offset, > + uint8_t nr_vectors, bool msi64bit, bool msi_per_vector_mask) > +{ > + uint8_t vectors_order; > + uint16_t flags; > + uint8_t cap_size; > + int config_offset; > + MSI_DEV_PRINTF(dev, > + "init offset: 0x%"PRIx8" vector: %"PRId8 > + " 64bit %d mask %d\n", > + offset, nr_vectors, msi64bit, msi_per_vector_mask); > + > + assert(!(nr_vectors & (nr_vectors - 1))); /* power of 2 */ > + assert(nr_vectors > 0); > + assert(nr_vectors <= 32); /* the nr of MSI vectors is up to 32 */ > + vectors_order = ffs(nr_vectors) - 1; > + > + flags = (vectors_order << (ffs(PCI_MSI_FLAGS_QMASK) - 1)) & > + PCI_MSI_FLAGS_QMASK; The & PCI_MSI_FLAGS_QMASK is not needed, is it? > + if (msi64bit) { > + flags |= PCI_MSI_FLAGS_64BIT; > + } > + if (msi_per_vector_mask) { > + flags |= PCI_MSI_FLAGS_MASKBIT; > + } > + > + cap_size = msi_cap_sizeof(flags); > + config_offset = pci_add_capability(dev, PCI_CAP_ID_MSI, offset, > cap_size); > + if (config_offset < 0) { > + return config_offset; > + } > + > + dev->msi_cap = config_offset; > + dev->cap_present |= QEMU_PCI_CAP_MSI; > + > + pci_set_word(dev->config + msi_flags_off(dev), flags); > + pci_set_word(dev->wmask + msi_flags_off(dev), > + PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); > + pci_set_long(dev->wmask + msi_address_lo_off(dev), > + PCI_MSI_ADDRESS_LO_MASK); > + if (msi64bit) { > + pci_set_long(dev->wmask + msi_address_hi_off(dev), 0xffffffff); > + } > + pci_set_word(dev->wmask + msi_data_off(dev, msi64bit), 0xffff); > + > + if (msi_per_vector_mask) { > + pci_set_long(dev->wmask + msi_mask_off(dev, msi64bit), > + (1U << nr_vectors) - 1); > + } > + return config_offset; > +} > + > +void msi_uninit(struct PCIDevice *dev) > +{ > + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); > + uint8_t cap_size = msi_cap_sizeof(flags); > + pci_del_capability(dev, PCI_CAP_ID_MSIX, cap_size); > + MSI_DEV_PRINTF(dev, "uninit\n"); > +} > + > +void msi_reset(PCIDevice *dev) > +{ > + uint16_t flags; > + bool msi64bit; > + > + flags = pci_get_word(dev->config + msi_flags_off(dev)); > + flags &= ~(PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE); > + msi64bit = flags & PCI_MSI_FLAGS_64BIT; > + > + pci_set_word(dev->config + msi_flags_off(dev), flags); > + pci_set_long(dev->config + msi_address_lo_off(dev), 0); > + if (msi64bit) { > + pci_set_long(dev->config + msi_address_hi_off(dev), 0); > + } > + pci_set_word(dev->config + msi_data_off(dev, msi64bit), 0); > + if (flags & PCI_MSI_FLAGS_MASKBIT) { > + pci_set_long(dev->config + msi_mask_off(dev, msi64bit), 0); > + pci_set_long(dev->config + msi_pending_off(dev, msi64bit), 0); > + } > + MSI_DEV_PRINTF(dev, "reset\n"); > +} > + > +static bool msi_is_masked(const PCIDevice *dev, uint8_t vector) > +{ > + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); > + uint32_t mask; > + > + if (!(flags & PCI_MSI_FLAGS_MASKBIT)) { > + return false; > + } > + > + mask = pci_get_long(dev->config + > + msi_mask_off(dev, flags & PCI_MSI_FLAGS_64BIT)); > + return mask & (1U << vector); > +} > + > +static void msi_set_pending(PCIDevice *dev, uint8_t vector) > +{ > + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); > + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; > + uint32_t pending; > + > + assert(flags & PCI_MSI_FLAGS_MASKBIT); > + > + pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit)); > + pending |= 1U << vector; > + pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending); > + MSI_DEV_PRINTF(dev, "pending vector 0x%"PRIx8"\n", vector); > +} > + > +void msi_notify(PCIDevice *dev, uint8_t vector) > +{ > + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); > + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; > + uint8_t nr_vectors = msi_nr_vectors(flags); > + uint64_t address; > + uint32_t data; > + > + assert(vector < nr_vectors); > + if (msi_is_masked(dev, vector)) { > + msi_set_pending(dev, vector); > + return; > + } > + > + if (msi64bit){ > + address = pci_get_quad(dev->config + msi_address_lo_off(dev)); > + } else { > + address = pci_get_long(dev->config + msi_address_lo_off(dev)); > + } > + > + /* upper bit 31:16 is zero */ > + data = pci_get_word(dev->config + msi_data_off(dev, msi64bit)); > + if (nr_vectors > 1) { > + data &= ~(nr_vectors - 1); > + data |= vector; > + } > + > + MSI_DEV_PRINTF(dev, > + "notify vector 0x%"PRIx8 > + " address: 0x%"PRIx64" data: 0x%"PRIx32"\n", > + vector, address, data); > + stl_phys(address, data); > +} > + > +/* call this function after updating configs by pci_default_write_config(). > */ > +void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len) > +{ > + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); > + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; > + bool msi_per_vector_mask = flags & PCI_MSI_FLAGS_MASKBIT; > + uint8_t nr_vectors; > + uint8_t nr_vectors_capable; > + uint8_t vector; > + uint32_t pending; > + int i; > + > +#ifdef MSI_DEBUG > + if (ranges_overlap(addr, len, dev->msi_cap, msi_cap_sizeof(flags))) { > + MSI_DEV_PRINTF(dev, "addr 0x%"PRIx32" val 0x%"PRIx32" len %d\n", > + addr, val, len); > + MSI_DEV_PRINTF(dev, "ctrl: 0x%"PRIx16" address: 0x%"PRIx32, > + flags, > + pci_get_long(dev->config + msi_address_lo_off(dev))); > + if (msi64bit) { > + fprintf(stderr, " addrss-hi: 0x%"PRIx32, > + pci_get_long(dev->config + msi_address_hi_off(dev))); > + } > + fprintf(stderr, " data: 0x%"PRIx16, > + pci_get_word(dev->config + msi_data_off(dev, msi64bit))); > + if (flags & PCI_MSI_FLAGS_MASKBIT) { > + fprintf(stderr, " mask 0x%"PRIx32" pending 0x%"PRIx32, > + pci_get_long(dev->config + msi_mask_off(dev, msi64bit)), > + pci_get_long(dev->config + msi_pending_off(dev, > msi64bit))); > + } > + fprintf(stderr, "\n"); > + } > +#endif > + > + /* do we modified? */ english: do -> Are > + if (!(ranges_overlap(addr, len, msi_flags_off(dev), 2) || > + (msi_per_vector_mask && > + ranges_overlap(addr, len, msi_mask_off(dev, msi64bit), 4)))) { > + return; > + } > + > + if (!(flags & PCI_MSI_FLAGS_ENABLE)) { > + return; > + } > + > + /* > + * Now MSI is enabled, clear INTx# interrupts. > + * Guest is prohibited from enabling msi while interrupt is asserted. > + * But it can. I think the above wording is misleading: the spec says that the *driver* is prohibited from writing enable bit to mask a service request. But the guest OS could do this. >+ * So we just discard the interrupts as moderate fallback. > + * > + * 6.8.3.3. Enabling Operation > + * While enabled for MSI or MSI-X operation, a function is prohibited > + * from using its INTx# pin (if implemented) to request > + * service (MSI, MSI-X, and INTx# are mutually exclusive). > + */ > + for (i = 0; i < PCI_NUM_PINS; ++i) { > + qemu_set_irq(dev->irq[i], 0); > + } > + > + /* nr_vectors might be set bigger than capable. So clamp it. */ Maybe add that this is not legal by spec, so we can do anything we like, just don't crash the host. > + nr_vectors = msi_nr_vectors(flags); > + nr_vectors_capable = 1U << > + ((flags & PCI_MSI_FLAGS_QMASK) >> (ffs(PCI_MSI_FLAGS_QMASK) - 1)); > + if (nr_vectors > nr_vectors_capable) { > + uint8_t vectors_order; > + nr_vectors = nr_vectors_capable; > + vectors_order = ffs(nr_vectors) - 1; > + > + flags &= ~PCI_MSI_FLAGS_QSIZE; > + flags |= (vectors_order << (ffs(PCI_MSI_FLAGS_QSIZE) - 1)) & > + PCI_MSI_FLAGS_QSIZE; > + pci_set_word(dev->config + msi_flags_off(dev), flags); > + } We do the shift/ffs dance many times here. This can be done easier if we compare the log values: log_num_vecs = (flags & PCI_MSI_FLAGS_QSIZE) >> (ffs(PCI_MSI_FLAGS_QSIZE) - 1); log_max_vecs = (flags & PCI_MSI_FLAGS_QMASK) >> (ffs(PCI_MSI_FLAGS_QMASK) - 1); if (log_num_vecs > log_max_vecs) { flags &= ~PCI_MSI_FLAGS_QSIZE; flags |= log_max_vecs << (ffs(PCI_MSI_FLAGS_QSIZE) - 1); pci_set_word(dev->config + msi_flags_off(dev), flags); } > + > + if (!msi_per_vector_mask) { > + /* if per vector masking isn't supported, > + there is no pending interrupt. */ > + return; > + } > + > + /* > + * If the number of vectors are reduced, clear pending bit for > + * freed vectors. > + * Since it is unknown here which interrupt will be used for pending bit > + * of freed vector, just discard the interrupt. The last two lines seem to confuse more than they explain. Replace with 'This will discard pending interrupts, if any.'? > + */ > + pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit)); > + pending &= (1U << pending) - 1; > + pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending); > + > + /* deliver pending interrupts which are unmasked */ > + for (vector = 0; vector < nr_vectors; ++vector) { > + if (msi_is_masked(dev, vector) || !(pending & (1U << vector))) { > + continue; > + } > + > + pending &= ~(1U << vector); > + pci_set_long(dev->config + msi_pending_off(dev, msi64bit), > + pending); > + msi_notify(dev, vector); > + } > +} > + > +uint8_t msi_nr_vectors_allocated(const PCIDevice *dev) > +{ > + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); > + return msi_nr_vectors(flags); > +} > diff --git a/hw/msi.h b/hw/msi.h > new file mode 100644 > index 0000000..eac9c78 > --- /dev/null > +++ b/hw/msi.h > @@ -0,0 +1,41 @@ > +/* > + * msi.h > + * > + * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp> > + * VA Linux Systems Japan K.K. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + > + * You should have received a copy of the GNU General Public License along > + * with this program; if not, see <http://www.gnu.org/licenses/>. > + */ > + > +#ifndef QEMU_MSI_H > +#define QEMU_MSI_H > + > +#include "qemu-common.h" > +#include "pci.h" > + > +bool msi_enabled(const PCIDevice *dev); > +int msi_init(struct PCIDevice *dev, uint8_t offset, > + uint8_t nr_vectors, bool msi64bit, bool msi_per_vector_mask); > +void msi_uninit(struct PCIDevice *dev); > +void msi_reset(PCIDevice *dev); > +void msi_notify(PCIDevice *dev, uint8_t vector); > +void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len); > +uint8_t msi_nr_vectors_allocated(const PCIDevice *dev); > + > +static inline bool msi_present(const PCIDevice *dev) > +{ > + return dev->cap_present & QEMU_PCI_CAP_MSI; > +} > + > +#endif /* QEMU_MSI_H */ > diff --git a/hw/pci.h b/hw/pci.h > index 2b4c318..296c7ba 100644 > --- a/hw/pci.h > +++ b/hw/pci.h > @@ -109,11 +109,12 @@ typedef struct PCIIORegion { > > /* Bits in cap_present field. */ > enum { > - QEMU_PCI_CAP_MSIX = 0x1, > - QEMU_PCI_CAP_EXPRESS = 0x2, > + QEMU_PCI_CAP_MSI = 0x1, > + QEMU_PCI_CAP_MSIX = 0x2, > + QEMU_PCI_CAP_EXPRESS = 0x4, > > /* multifunction capable device */ > -#define QEMU_PCI_CAP_MULTIFUNCTION_BITNR 2 > +#define QEMU_PCI_CAP_MULTIFUNCTION_BITNR 3 > QEMU_PCI_CAP_MULTIFUNCTION = (1 << QEMU_PCI_CAP_MULTIFUNCTION_BITNR), > }; > > @@ -168,6 +169,9 @@ struct PCIDevice { > /* Version id needed for VMState */ > int32_t version_id; > > + /* Offset of MSI capability in config space */ > + uint8_t msi_cap; > + > /* Location of option rom */ > char *romfile; > ram_addr_t rom_offset; > -- > 1.7.1.1