Re: [PATCH 3/6 v3] PCI: support ARI capability
* Zhao, Yu [EMAIL PROTECTED]: Add Alternative Routing-ID Interpretation (ARI) support. Cc: Jesse Barnes [EMAIL PROTECTED] Cc: Randy Dunlap [EMAIL PROTECTED] Cc: Grant Grundler [EMAIL PROTECTED] Cc: Alex Chiang [EMAIL PROTECTED] Cc: Matthew Wilcox [EMAIL PROTECTED] Cc: Roland Dreier [EMAIL PROTECTED] Cc: Greg KH [EMAIL PROTECTED] Signed-off-by: Yu Zhao [EMAIL PROTECTED] --- drivers/pci/pci.c| 31 +++ drivers/pci/pci.h| 12 drivers/pci/probe.c |3 +++ include/linux/pci.h |1 + include/linux/pci_regs.h | 14 ++ 5 files changed, 61 insertions(+), 0 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 400d3b3..fe9efc4 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1260,6 +1260,37 @@ void pci_pm_init(struct pci_dev *dev) } } +/** + * pci_ari_init - turn on ARI forwarding if it's supported + * @dev: the PCI device + */ +void pci_ari_init(struct pci_dev *dev) +{ + int pos; + u32 cap; + u16 ctrl; + + if (!dev-is_pcie || (dev-pcie_type != PCI_EXP_TYPE_ROOT_PORT + dev-pcie_type != PCI_EXP_TYPE_DOWNSTREAM)) + return; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return; + + pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, cap); + + if (!(cap PCI_EXP_DEVCAP2_ARI)) + return; + + pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); + ctrl |= PCI_EXP_DEVCTL2_ARI; + pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl); + + dev-ari_enabled = 1; + dev_info(dev-dev, ARI forwarding enabled.\n); This is user-visible, so my questions are: 1) Does this really add value for the user? Or is this just more noise? 2) Is this output string informative enough for the user? Thanks. /ac -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6 v3] PCI: support SR-IOV capability
* Zhao, Yu [EMAIL PROTECTED]: +/** + * pci_iov_init - initialize device's SR-IOV capability + * @dev: the PCI device + * + * Returns 0 on success, or negative on failure. + * + * The major differences between Virtual Function and PCI device are: + * 1) the device with multiple bus numbers uses internal routing, so + *there is no explicit bridge device in this case. + * 2) Virtual Function memory spaces are designated by BARs encapsulated + *in the capability structure, and the BARs in Virtual Function PCI + *configuration space are read-only zero. + */ +int pci_iov_init(struct pci_dev *dev) +{ + int i; + int pos; + u32 pgsz; + u16 ctrl, total, initial, offset, stride; + struct pci_iov *iov; + struct resource *res; + + if (!dev-is_pcie || (dev-pcie_type != PCI_EXP_TYPE_RC_END + dev-pcie_type != PCI_EXP_TYPE_ENDPOINT)) + return -ENODEV; + + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_IOV); + if (!pos) + return -ENODEV; + + ctrl = pci_ari_enabled(dev) ? PCI_IOV_CTRL_ARI : 0; + pci_write_config_word(dev, pos + PCI_IOV_CTRL, ctrl); + ssleep(1); + + pci_read_config_word(dev, pos + PCI_IOV_TOTAL_VF, total); + pci_read_config_word(dev, pos + PCI_IOV_INITIAL_VF, initial); + pci_write_config_word(dev, pos + PCI_IOV_NUM_VF, initial); + pci_read_config_word(dev, pos + PCI_IOV_VF_OFFSET, offset); + pci_read_config_word(dev, pos + PCI_IOV_VF_STRIDE, stride); + if (!total || initial total || (initial !offset) || + (initial 1 !stride)) + return -EIO; + + pci_read_config_dword(dev, pos + PCI_IOV_SUP_PGSIZE, pgsz); + i = PAGE_SHIFT 12 ? PAGE_SHIFT - 12 : 0; + pgsz = ~((1 i) - 1); + if (!pgsz) + return -EIO; + + pgsz = ~(pgsz - 1); + pci_write_config_dword(dev, pos + PCI_IOV_SYS_PGSIZE, pgsz); + + iov = kzalloc(sizeof(*iov), GFP_KERNEL); + if (!iov) + return -ENOMEM; + + iov-dev = dev; + iov-cap = pos; + iov-totalvfs = total; + iov-initialvfs = initial; + iov-offset = offset; + iov-stride = stride; + iov-align = pgsz 12; + mutex_init(iov-mutex); + + for (i = 0; i PCI_IOV_NUM_BAR; i++) { + res = dev-resource + PCI_IOV_RESOURCES + i; + pos = iov-cap + PCI_IOV_BAR_0 + i * 4; + i += pci_read_base(dev, pci_bar_unknown, res, pos); + if (!res-flags) + continue; + res-flags = ~IORESOURCE_SIZEALIGN; + res-end = res-start + resource_size(res) * total - 1; + } + + dev-iov = iov; + dev_info(dev-dev, SR-IOV capability is initialized\n); Same questions here that I had for the ARI stuff. Does this dev_info add value, or is it more noise, and is this message informative enough? + + return 0; +} + +/** + * pci_iov_release - release resources used by SR-IOV capability + * @dev: the PCI device + */ +void pci_iov_release(struct pci_dev *dev) +{ + if (!dev-iov) + return; + + mutex_destroy(dev-iov-mutex); + kfree(dev-iov); + dev-iov = NULL; +} + +/** + * pci_iov_create_sysfs - create sysfs for SR-IOV capability + * @dev: the PCI device + */ +void pci_iov_create_sysfs(struct pci_dev *dev) +{ + int rc; + int i, j; + struct pci_iov *iov = dev-iov; + + if (!iov) + return; + + iov-ve = kzalloc(sizeof(*iov-ve) * iov-totalvfs, GFP_KERNEL); + if (!iov-ve) + return; + + for (i = 0; i iov-totalvfs; i++) { + iov-ve[i].vfn = i; + iov-ve[i].iov = iov; + } + + rc = kobject_init_and_add(iov-kobj, iov_ktype, + dev-dev.kobj, iov); + if (rc) + goto failed1; + + for (i = 0; i ARRAY_SIZE(iov_attr); i++) { + rc = sysfs_create_file(iov-kobj, iov_attr[i].attr); + if (rc) + goto failed2; + } + + for (i = 0; i iov-totalvfs; i++) { + sprintf(iov-ve[i].name, %d, i); + rc = kobject_init_and_add(iov-ve[i].kobj, iov_ktype, + iov-kobj, iov-ve[i].name); + if (rc) + goto failed3; + rc = sysfs_create_file(iov-ve[i].kobj, vf_attr.attr); + if (rc) { + kobject_put(iov-ve[i].kobj); + goto failed3; + } + } Do you want to emit a kobject_uevent here after success? Alternatively, have you investigated making these virtual functions into real struct device's? You get a lot of sysfs stuff for free if you do
Re: [PATCH 1/4 v2] PCI: introduce new base functions
* Zhao, Yu [EMAIL PROTECTED]: It can be PCI_BRIDGE_RESOURCES, because there may be some non-standard resources following PCI_ROM_RESOURCE and before PCI_BRIDGE_RESOURCES. For example, a standard PCI device has following resources: 0 - 5 BARs 6 ROM 7 - 10 Bridge After SR-IOV is enabled, it becomes 0 - 5 standard BARs 6 Rom 7 - 12 SR-IOV BARs 13 - 16 Bridge Ok, makes sense now; was that documented somewhere else, and I just missed it? Same as above, the PCI_BRIDGE_RES_END varies when some features is enabled or disabled. Ok, I must have missed this documentation too. Thank you very much for carefully reviewing these patches. I'd like to invite you to review next version again if it's convenient for you. Sure, you can Cc me next time too. Thanks. /ac -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/4 v2] PCI: support SR-IOV capability
* Zhao, Yu [EMAIL PROTECTED]: Support SR-IOV capability. By default, this feature is not enabled and the SR-IOV device behaves as traditional PCI device. After it's enabled, each Virtual Function's PCI configuration space can be accessed using its own Bus, Device and Function Number (Routing ID). Each Virtual Function also has PCI Memory Space, which is used to map its own register set. Signed-off-by: Yu Zhao [EMAIL PROTECTED] Signed-off-by: Eddie Dong [EMAIL PROTECTED] --- drivers/pci/Kconfig | 10 + drivers/pci/Makefile |2 + drivers/pci/iov.c| 555 ++ drivers/pci/pci.c| 14 +- drivers/pci/pci.h| 44 drivers/pci/probe.c |5 + include/linux/pci.h | 28 +++ include/linux/pci_regs.h | 20 ++ 8 files changed, 677 insertions(+), 1 deletions(-) create mode 100644 drivers/pci/iov.c diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index f43cc46..0a1fe01 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -57,3 +57,13 @@ config PCI_ARI default n help This enables PCI Alternative Routing-ID Interpretation. + +config PCI_IOV + bool PCI SR-IOV support + depends on PCI HOTPLUG + select PCI_MSI + select PCI_ARI + select HOTPLUG_PCI + default n + help + This allows device drivers to enable Single Root I/O Virtualization. I'd like to see this Kconfig help text enhanced too. Telling a user that PCI_IOV enables Single Root I/O Virtualization isn't that helpful, but explaining what SR-IOV actually gets you _is_ helpful. This option allows device drivers to enable Single Root I/O Virtualization. Each Virtual Function's PCI configuration space can be accessed using its own Bus, Device and Function Number (Routing ID). Each Virtual Function also has PCI Memory Space, which is used to map its own register set. Thanks. /ac -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4 v2] PCI: document the change
* Zhao, Yu [EMAIL PROTECTED]: +1. Overview + +1.1 What is SR-IOV + +Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended +capability which makes one physical device appear as multiple virtual +devices. The physical device is referred to as Physical Function while +the virtual devices are referred to as Virtual Functions. Allocation +of Virtual Functions can be dynamically controlled by Physical Function +via registers encapsulated in the capability. By default, this feature +is not enabled and the Physical Function behaves as traditional PCIe +device. Once it's turned on, each Virtual Function's PCI configuration +space can be accessed by its own Bus, Device and Function Number (Routing +ID). And each Virtual Function also has PCI Memory Space, which is used +to map its register set. Virtual Function device driver operates on the +register set so it can be functional and appear as a real existing PCI +device. + +1.2 What is ARI + +Alternative Routing-ID Interpretation (ARI) allows a PCI Express Endpoint +to use its device number field as part of function number. Traditionally, +an Endpoint can only have 8 functions, and the device number of all +Endpoints is zero. With ARI enabled, an Endpoint can have up to 256 +functions by using device number in conjunction with function number to +indicate a function in the device. This is almost transparent to the Linux +kernel because the Linux kernel still can use 8-bit bus number field plus +8-bit devfn number field to locate a function. ARI is managed via the ARI +Forwarding bit in the Device Capabilities 2 register of the PCI Express +Capability on the Root Port or the Downstream Port and a new ARI Capability +on the Endpoint. + + +2. User Guide + +2.1 How can I manage SR-IOV + +If a device supports SR-IOV, then there should be some entires under entries +/sys/bus/pci/slots/. The names of the entires are :BB:DD.F-iov-, entries +where the first part (:BB:DD.F) is the domain, bus, device and function +number of the device, and the third part () is the index of a Virtual +Function. There are three files under the entry: power, param and address. So are you saying here that you will actually create sysfs files named: /sys/bus/pci/slots/:BB:DD.F-iov- We just spent some effort cleaning out this directory and getting human-readable names in /sys/bus/pci/slots/. The entries created there _should_ match the labelling on the physical chassis, assuming firmware tells us the right name. How will these particular slot entries be created? Do the individual hotplug drivers create them? Sorry, I guess I haven't read the patch series, just skimmed the documentation. I can go do that before asking any further dumb questions, but this little bit of information was a bit surprising to me. + - Writing 1 to the power will enable the Virtual Function, + and 0 will disable the Virtual Function; Reading it will get + status of the Virtual Function. + - Reading the address will get the bus, device and function + number of the Virtual Function. + - The param is the device specific parameters which may be + used by the Physical or Virtual Functions drivers. + +2.2 How can I use Virtual Functions + +Virtual Functions is treated as hot-plugged PCI devices in the kernel, +so they should be able to work in the same way as real PCI devices. +NOTE: Virtual Function device driver must be loaded to make it work. Ok, I'll go read the rest of the patch series; looks like you wrote a new virtual function driver. Thanks. /ac -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/4 v2] PCI: support SR-IOV capability
* Zhao, Yu [EMAIL PROTECTED]: Support SR-IOV capability. By default, this feature is not enabled and the SR-IOV device behaves as traditional PCI device. After it's enabled, each Virtual Function's PCI configuration space can be accessed using its own Bus, Device and Function Number (Routing ID). Each Virtual Function also has PCI Memory Space, which is used to map its own register set. Signed-off-by: Yu Zhao [EMAIL PROTECTED] Signed-off-by: Eddie Dong [EMAIL PROTECTED] --- drivers/pci/Kconfig | 10 + drivers/pci/Makefile |2 + drivers/pci/iov.c| 555 ++ drivers/pci/pci.c| 14 +- drivers/pci/pci.h| 44 drivers/pci/probe.c |5 + include/linux/pci.h | 28 +++ include/linux/pci_regs.h | 20 ++ 8 files changed, 677 insertions(+), 1 deletions(-) create mode 100644 drivers/pci/iov.c diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index f43cc46..0a1fe01 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -57,3 +57,13 @@ config PCI_ARI default n help This enables PCI Alternative Routing-ID Interpretation. + +config PCI_IOV + bool PCI SR-IOV support + depends on PCI HOTPLUG + select PCI_MSI + select PCI_ARI + select HOTPLUG_PCI + default n + help + This allows device drivers to enable Single Root I/O Virtualization. diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 96f2767..2dcefce 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -55,3 +55,5 @@ EXTRA_CFLAGS += -DDEBUG endif obj-$(CONFIG_PCI_ARI) += ari.o + +obj-$(CONFIG_PCI_IOV) += iov.o diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c new file mode 100644 index 000..0656655 --- /dev/null +++ b/drivers/pci/iov.c @@ -0,0 +1,555 @@ +/* + * drivers/pci/iov.c + * + * Copyright (C) 2008 Intel Corporation, Yu Zhao [EMAIL PROTECTED] + * + * PCI Express Single Root I/O Virtualization capability support. + */ + +#include linux/ctype.h +#include linux/string.h +#include linux/pci.h +#include linux/pci_hotplug.h +#include linux/delay.h +#include asm/page.h + +#include pci.h + + +#define PCI_IOV_SLOTNAME_LEN 24 + +#define notify(dev, event, id, param) ({ \ + dev-iov-cb ? dev-iov-cb(dev, event, id, param) : 0; \ +}) + + +struct virtfn_slot { + int id; + char name[PCI_IOV_SLOTNAME_LEN]; + struct pci_dev *dev; + struct list_head node; + struct hotplug_slot *slot; +}; + +static int enable_virtfn(struct hotplug_slot *); +static int disable_virtfn(struct hotplug_slot *); +static int set_virtfn_param(struct hotplug_slot *, const char *, int); +static int get_virtfn_param(struct hotplug_slot *, const char **); + +static struct hotplug_slot_ops virtfn_slot_ops = { + .owner = THIS_MODULE, + .enable_slot= enable_virtfn, + .disable_slot = disable_virtfn, + .set_param = set_virtfn_param, + .get_param = get_virtfn_param +}; + +static DEFINE_MUTEX(iov_lock); + + +static inline void get_addr(struct pci_dev *dev, int id, u8 *busnr, u8 *devfn) +{ + u16 addr; + + addr = (dev-bus-number 8) + dev-devfn + + dev-iov-offset + dev-iov-stride * id; + *busnr = addr 8; + *devfn = addr 0xff; +} + +static inline struct pci_bus *find_bus(struct pci_dev *dev, int busnr) +{ + struct pci_bus *bus; + + down_read(pci_bus_sem); + list_for_each_entry(bus, dev-bus-children, node) + if (bus-number == busnr) { + up_read(pci_bus_sem); + return bus; + } + up_read(pci_bus_sem); + + return NULL; +} + +static int alloc_virtfn(struct pci_dev *dev, int id) +{ + int i; + int rc; + u8 busnr, devfn; + unsigned long size; + struct pci_dev *new; + struct pci_bus *bus; + struct resource *res; + + get_addr(dev, id, busnr, devfn); + + new = alloc_pci_dev(); + if (!new) + return -ENOMEM; + + bus = find_bus(dev, busnr); + BUG_ON(!bus); + new-bus = bus; + new-sysdata = bus-sysdata; + new-dev.parent = dev-dev.parent; + new-dev.bus = dev-dev.bus; + new-devfn = devfn; + new-hdr_type = PCI_HEADER_TYPE_NORMAL; + new-multifunction = 0; + new-vendor = dev-vendor; + pci_read_config_word(dev, dev-iov-cap + PCI_IOV_VF_DID, new-device); + new-cfg_size = 4096; + new-error_state = pci_channel_io_normal; + new-pcie_type = PCI_EXP_TYPE_ENDPOINT; + new-dma_mask = 0x; + + dev_set_name(new-dev, %04x:%02x:%02x.%d, pci_domain_nr(bus), + busnr, PCI_SLOT(devfn), PCI_FUNC(devfn)); + + pci_read_config_byte(new, PCI_REVISION_ID, new-revision); + new-class = dev-class; + new-current_state =