Re: [PATCH 3/6 v3] PCI: support ARI capability

2008-09-30 Thread Alex Chiang
* Zhao, Yu [EMAIL PROTECTED]:
 Add Alternative Routing-ID Interpretation (ARI) support.
 
 Cc: Jesse Barnes [EMAIL PROTECTED]
 Cc: Randy Dunlap [EMAIL PROTECTED]
 Cc: Grant Grundler [EMAIL PROTECTED]
 Cc: Alex Chiang [EMAIL PROTECTED]
 Cc: Matthew Wilcox [EMAIL PROTECTED]
 Cc: Roland Dreier [EMAIL PROTECTED]
 Cc: Greg KH [EMAIL PROTECTED]
 Signed-off-by: Yu Zhao [EMAIL PROTECTED]
 
 ---
  drivers/pci/pci.c|   31 +++
  drivers/pci/pci.h|   12 
  drivers/pci/probe.c  |3 +++
  include/linux/pci.h  |1 +
  include/linux/pci_regs.h |   14 ++
  5 files changed, 61 insertions(+), 0 deletions(-)
 
 diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
 index 400d3b3..fe9efc4 100644
 --- a/drivers/pci/pci.c
 +++ b/drivers/pci/pci.c
 @@ -1260,6 +1260,37 @@ void pci_pm_init(struct pci_dev *dev)
 }
  }
 
 +/**
 + * pci_ari_init - turn on ARI forwarding if it's supported
 + * @dev: the PCI device
 + */
 +void pci_ari_init(struct pci_dev *dev)
 +{
 +   int pos;
 +   u32 cap;
 +   u16 ctrl;
 +
 +   if (!dev-is_pcie || (dev-pcie_type != PCI_EXP_TYPE_ROOT_PORT 
 +   dev-pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
 +   return;
 +
 +   pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
 +   if (!pos)
 +   return;
 +
 +   pci_read_config_dword(dev, pos + PCI_EXP_DEVCAP2, cap);
 +
 +   if (!(cap  PCI_EXP_DEVCAP2_ARI))
 +   return;
 +
 +   pci_read_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
 +   ctrl |= PCI_EXP_DEVCTL2_ARI;
 +   pci_write_config_word(dev, pos + PCI_EXP_DEVCTL2, ctrl);
 +
 +   dev-ari_enabled = 1;
 +   dev_info(dev-dev, ARI forwarding enabled.\n);

This is user-visible, so my questions are:

1) Does this really add value for the user? Or is this
just more noise?

2) Is this output string informative enough for the user?

Thanks.

/ac

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/6 v3] PCI: support SR-IOV capability

2008-09-30 Thread Alex Chiang
* Zhao, Yu [EMAIL PROTECTED]:
 +/**
 + * pci_iov_init - initialize device's SR-IOV capability
 + * @dev: the PCI device
 + *
 + * Returns 0 on success, or negative on failure.
 + *
 + * The major differences between Virtual Function and PCI device are:
 + * 1) the device with multiple bus numbers uses internal routing, so
 + *there is no explicit bridge device in this case.
 + * 2) Virtual Function memory spaces are designated by BARs encapsulated
 + *in the capability structure, and the BARs in Virtual Function PCI
 + *configuration space are read-only zero.
 + */
 +int pci_iov_init(struct pci_dev *dev)
 +{
 +   int i;
 +   int pos;
 +   u32 pgsz;
 +   u16 ctrl, total, initial, offset, stride;
 +   struct pci_iov *iov;
 +   struct resource *res;
 +
 +   if (!dev-is_pcie || (dev-pcie_type != PCI_EXP_TYPE_RC_END 
 +   dev-pcie_type != PCI_EXP_TYPE_ENDPOINT))
 +   return -ENODEV;
 +
 +   pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_IOV);
 +   if (!pos)
 +   return -ENODEV;
 +
 +   ctrl = pci_ari_enabled(dev) ? PCI_IOV_CTRL_ARI : 0;
 +   pci_write_config_word(dev, pos + PCI_IOV_CTRL, ctrl);
 +   ssleep(1);
 +
 +   pci_read_config_word(dev, pos + PCI_IOV_TOTAL_VF, total);
 +   pci_read_config_word(dev, pos + PCI_IOV_INITIAL_VF, initial);
 +   pci_write_config_word(dev, pos + PCI_IOV_NUM_VF, initial);
 +   pci_read_config_word(dev, pos + PCI_IOV_VF_OFFSET, offset);
 +   pci_read_config_word(dev, pos + PCI_IOV_VF_STRIDE, stride);
 +   if (!total || initial  total || (initial  !offset) ||
 +   (initial  1  !stride))
 +   return -EIO;
 +
 +   pci_read_config_dword(dev, pos + PCI_IOV_SUP_PGSIZE, pgsz);
 +   i = PAGE_SHIFT  12 ? PAGE_SHIFT - 12 : 0;
 +   pgsz = ~((1  i) - 1);
 +   if (!pgsz)
 +   return -EIO;
 +
 +   pgsz = ~(pgsz - 1);
 +   pci_write_config_dword(dev, pos + PCI_IOV_SYS_PGSIZE, pgsz);
 +
 +   iov = kzalloc(sizeof(*iov), GFP_KERNEL);
 +   if (!iov)
 +   return -ENOMEM;
 +
 +   iov-dev = dev;
 +   iov-cap = pos;
 +   iov-totalvfs = total;
 +   iov-initialvfs = initial;
 +   iov-offset = offset;
 +   iov-stride = stride;
 +   iov-align = pgsz  12;
 +   mutex_init(iov-mutex);
 +
 +   for (i = 0; i  PCI_IOV_NUM_BAR; i++) {
 +   res = dev-resource + PCI_IOV_RESOURCES + i;
 +   pos = iov-cap + PCI_IOV_BAR_0 + i * 4;
 +   i += pci_read_base(dev, pci_bar_unknown, res, pos);
 +   if (!res-flags)
 +   continue;
 +   res-flags = ~IORESOURCE_SIZEALIGN;
 +   res-end = res-start + resource_size(res) * total - 1;
 +   }
 +
 +   dev-iov = iov;
 +   dev_info(dev-dev, SR-IOV capability is initialized\n);

Same questions here that I had for the ARI stuff. Does this
dev_info add value, or is it more noise, and is this message
informative enough? 

 +
 +   return 0;
 +}
 +
 +/**
 + * pci_iov_release - release resources used by SR-IOV capability
 + * @dev: the PCI device
 + */
 +void pci_iov_release(struct pci_dev *dev)
 +{
 +   if (!dev-iov)
 +   return;
 +
 +   mutex_destroy(dev-iov-mutex);
 +   kfree(dev-iov);
 +   dev-iov = NULL;
 +}
 +
 +/**
 + * pci_iov_create_sysfs - create sysfs for SR-IOV capability
 + * @dev: the PCI device
 + */
 +void pci_iov_create_sysfs(struct pci_dev *dev)
 +{
 +   int rc;
 +   int i, j;
 +   struct pci_iov *iov = dev-iov;
 +
 +   if (!iov)
 +   return;
 +
 +   iov-ve = kzalloc(sizeof(*iov-ve) * iov-totalvfs, GFP_KERNEL);
 +   if (!iov-ve)
 +   return;
 +
 +   for (i = 0; i  iov-totalvfs; i++) {
 +   iov-ve[i].vfn = i;
 +   iov-ve[i].iov = iov;
 +   }
 +
 +   rc = kobject_init_and_add(iov-kobj, iov_ktype,
 +   dev-dev.kobj, iov);
 +   if (rc)
 +   goto failed1;
 +
 +   for (i = 0; i  ARRAY_SIZE(iov_attr); i++) {
 +   rc = sysfs_create_file(iov-kobj, iov_attr[i].attr);
 +   if (rc)
 +   goto failed2;
 +   }
 +
 +   for (i = 0; i  iov-totalvfs; i++) {
 +   sprintf(iov-ve[i].name, %d, i);
 +   rc = kobject_init_and_add(iov-ve[i].kobj, iov_ktype,
 +   iov-kobj, iov-ve[i].name);
 +   if (rc)
 +   goto failed3;
 +   rc = sysfs_create_file(iov-ve[i].kobj, vf_attr.attr);
 +   if (rc) {
 +   kobject_put(iov-ve[i].kobj);
 +   goto failed3;
 +   }
 +   }

Do you want to emit a kobject_uevent here after success?

Alternatively, have you investigated making these virtual
functions into real struct device's? You get a lot of sysfs stuff
for free if you do 

Re: [PATCH 1/4 v2] PCI: introduce new base functions

2008-09-10 Thread Alex Chiang
* Zhao, Yu [EMAIL PROTECTED]:
 
 It can be PCI_BRIDGE_RESOURCES, because there may be some
 non-standard resources following PCI_ROM_RESOURCE and before
 PCI_BRIDGE_RESOURCES.
 
 For example, a standard PCI device has following resources:
   0 - 5   BARs
   6   ROM
   7 - 10  Bridge
 
 After SR-IOV is enabled, it becomes
   0 - 5   standard BARs
   6   Rom
  7 - 12  SR-IOV BARs
   13 - 16 Bridge

Ok, makes sense now; was that documented somewhere else, and I
just missed it?

 Same as above, the PCI_BRIDGE_RES_END varies when some features
 is enabled or disabled.

Ok, I must have missed this documentation too.

 Thank you very much for carefully reviewing these patches. I'd
 like to invite you to review next version again if it's
 convenient for you.

Sure, you can Cc me next time too.

Thanks.

/ac

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4 v2] PCI: support SR-IOV capability

2008-09-01 Thread Alex Chiang
* Zhao, Yu [EMAIL PROTECTED]:
 Support SR-IOV capability. By default, this feature is not enabled and the 
 SR-IOV device behaves as traditional PCI device. After it's enabled, each 
 Virtual Function's PCI configuration space can be accessed using its own Bus, 
 Device and Function Number (Routing ID). Each Virtual Function also has PCI 
 Memory Space, which is used to map its own register set.
 
 Signed-off-by: Yu Zhao [EMAIL PROTECTED]
 Signed-off-by: Eddie Dong [EMAIL PROTECTED]
 
 ---
  drivers/pci/Kconfig  |   10 +
  drivers/pci/Makefile |2 +
  drivers/pci/iov.c|  555 
 ++
  drivers/pci/pci.c|   14 +-
  drivers/pci/pci.h|   44 
  drivers/pci/probe.c  |5 +
  include/linux/pci.h  |   28 +++
  include/linux/pci_regs.h |   20 ++
  8 files changed, 677 insertions(+), 1 deletions(-)
  create mode 100644 drivers/pci/iov.c
 
 diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
 index f43cc46..0a1fe01 100644
 --- a/drivers/pci/Kconfig
 +++ b/drivers/pci/Kconfig
 @@ -57,3 +57,13 @@ config PCI_ARI
   default n
   help
 This enables PCI Alternative Routing-ID Interpretation.
 +
 +config PCI_IOV
 + bool PCI SR-IOV support
 + depends on PCI  HOTPLUG
 + select PCI_MSI
 + select PCI_ARI
 + select HOTPLUG_PCI
 + default n
 + help
 +   This allows device drivers to enable Single Root I/O Virtualization.

I'd like to see this Kconfig help text enhanced too. Telling a
user that PCI_IOV enables Single Root I/O Virtualization isn't
that helpful, but explaining what SR-IOV actually gets you _is_
helpful.

This option allows device drivers to enable Single Root I/O
Virtualization.  Each Virtual Function's PCI configuration
space can be accessed using its own Bus, Device and Function
Number (Routing ID). Each Virtual Function also has PCI Memory
Space, which is used to map its own register set.

Thanks.

/ac

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4 v2] PCI: document the change

2008-09-01 Thread Alex Chiang
* Zhao, Yu [EMAIL PROTECTED]:
 +1. Overview
 +
 +1.1 What is SR-IOV
 +
 +Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
 +capability which makes one physical device appear as multiple virtual
 +devices. The physical device is referred to as Physical Function while
 +the virtual devices are referred to as Virtual Functions. Allocation
 +of Virtual Functions can be dynamically controlled by Physical Function
 +via registers encapsulated in the capability. By default, this feature
 +is not enabled and the Physical Function behaves as traditional PCIe
 +device. Once it's turned on, each Virtual Function's PCI configuration
 +space can be accessed by its own Bus, Device and Function Number (Routing
 +ID). And each Virtual Function also has PCI Memory Space, which is used
 +to map its register set. Virtual Function device driver operates on the
 +register set so it can be functional and appear as a real existing PCI
 +device.
 +
 +1.2 What is ARI
 +
 +Alternative Routing-ID Interpretation (ARI) allows a PCI Express Endpoint
 +to use its device number field as part of function number. Traditionally,
 +an Endpoint can only have 8 functions, and the device number of all
 +Endpoints is zero. With ARI enabled, an Endpoint can have up to 256
 +functions by using device number in conjunction with function number to
 +indicate a function in the device. This is almost transparent to the Linux
 +kernel because the Linux kernel still can use 8-bit bus number field plus
 +8-bit devfn number field to locate a function. ARI is managed via the ARI
 +Forwarding bit in the Device Capabilities 2 register of the PCI Express
 +Capability on the Root Port or the Downstream Port and a new ARI Capability
 +on the Endpoint.
 +
 +
 +2. User Guide
 +
 +2.1 How can I manage SR-IOV
 +
 +If a device supports SR-IOV, then there should be some entires under
  entries
 +/sys/bus/pci/slots/. The names of the entires are :BB:DD.F-iov-,
 entries
 +where the first part (:BB:DD.F) is the domain, bus, device and function
 +number of the device, and the third part () is the index of a Virtual
 +Function. There are three files under the entry: power, param and address.

So are you saying here that you will actually create sysfs files
named:

/sys/bus/pci/slots/:BB:DD.F-iov-

We just spent some effort cleaning out this directory and getting
human-readable names in /sys/bus/pci/slots/. The entries created
there _should_ match the labelling on the physical chassis,
assuming firmware tells us the right name.

How will these particular slot entries be created? Do the
individual hotplug drivers create them?

Sorry, I guess I haven't read the patch series, just skimmed the
documentation. I can go do that before asking any further dumb
questions, but this little bit of information was a bit
surprising to me.

 + - Writing 1 to the power will enable the Virtual Function,
 + and 0 will disable the Virtual Function; Reading it will get
 + status of the Virtual Function.
 + - Reading the address will get the bus, device and function
 + number of the Virtual Function.
 + - The param is the device specific parameters which may be
 + used by the Physical or Virtual Functions drivers.
 +
 +2.2 How can I use Virtual Functions
 +
 +Virtual Functions is treated as hot-plugged PCI devices in the kernel,
 +so they should be able to work in the same way as real PCI devices.
 +NOTE: Virtual Function device driver must be loaded to make it work.

Ok, I'll go read the rest of the patch series; looks like you
wrote a new virtual function driver.

Thanks.

/ac

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4 v2] PCI: support SR-IOV capability

2008-09-01 Thread Alex Chiang
* Zhao, Yu [EMAIL PROTECTED]:
 Support SR-IOV capability. By default, this feature is not enabled and the 
 SR-IOV device behaves as traditional PCI device. After it's enabled, each 
 Virtual Function's PCI configuration space can be accessed using its own Bus, 
 Device and Function Number (Routing ID). Each Virtual Function also has PCI 
 Memory Space, which is used to map its own register set.
 
 Signed-off-by: Yu Zhao [EMAIL PROTECTED]
 Signed-off-by: Eddie Dong [EMAIL PROTECTED]
 
 ---
  drivers/pci/Kconfig  |   10 +
  drivers/pci/Makefile |2 +
  drivers/pci/iov.c|  555 
 ++
  drivers/pci/pci.c|   14 +-
  drivers/pci/pci.h|   44 
  drivers/pci/probe.c  |5 +
  include/linux/pci.h  |   28 +++
  include/linux/pci_regs.h |   20 ++
  8 files changed, 677 insertions(+), 1 deletions(-)
  create mode 100644 drivers/pci/iov.c
 
 diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
 index f43cc46..0a1fe01 100644
 --- a/drivers/pci/Kconfig
 +++ b/drivers/pci/Kconfig
 @@ -57,3 +57,13 @@ config PCI_ARI
   default n
   help
 This enables PCI Alternative Routing-ID Interpretation.
 +
 +config PCI_IOV
 + bool PCI SR-IOV support
 + depends on PCI  HOTPLUG
 + select PCI_MSI
 + select PCI_ARI
 + select HOTPLUG_PCI
 + default n
 + help
 +   This allows device drivers to enable Single Root I/O Virtualization.
 diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
 index 96f2767..2dcefce 100644
 --- a/drivers/pci/Makefile
 +++ b/drivers/pci/Makefile
 @@ -55,3 +55,5 @@ EXTRA_CFLAGS += -DDEBUG
  endif
  
  obj-$(CONFIG_PCI_ARI) += ari.o
 +
 +obj-$(CONFIG_PCI_IOV) += iov.o
 diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
 new file mode 100644
 index 000..0656655
 --- /dev/null
 +++ b/drivers/pci/iov.c
 @@ -0,0 +1,555 @@
 +/*
 + * drivers/pci/iov.c
 + *
 + * Copyright (C) 2008 Intel Corporation, Yu Zhao [EMAIL PROTECTED]
 + *
 + * PCI Express Single Root I/O Virtualization capability support.
 + */
 +
 +#include linux/ctype.h
 +#include linux/string.h
 +#include linux/pci.h
 +#include linux/pci_hotplug.h
 +#include linux/delay.h
 +#include asm/page.h
 +
 +#include pci.h
 +
 +
 +#define PCI_IOV_SLOTNAME_LEN 24
 +
 +#define notify(dev, event, id, param) ({ \
 + dev-iov-cb ? dev-iov-cb(dev, event, id, param) : 0; \
 +})
 +
 +
 +struct virtfn_slot {
 + int id;
 + char name[PCI_IOV_SLOTNAME_LEN];
 + struct pci_dev *dev;
 + struct list_head node;
 + struct hotplug_slot *slot;
 +};
 +
 +static int enable_virtfn(struct hotplug_slot *);
 +static int disable_virtfn(struct hotplug_slot *);
 +static int set_virtfn_param(struct hotplug_slot *, const char *, int);
 +static int get_virtfn_param(struct hotplug_slot *, const char **);
 +
 +static struct hotplug_slot_ops virtfn_slot_ops = {
 + .owner  = THIS_MODULE,
 + .enable_slot= enable_virtfn,
 + .disable_slot   = disable_virtfn,
 + .set_param  = set_virtfn_param,
 + .get_param  = get_virtfn_param
 +};
 +
 +static DEFINE_MUTEX(iov_lock);
 +
 +
 +static inline void get_addr(struct pci_dev *dev, int id, u8 *busnr, u8 
 *devfn)
 +{
 + u16 addr;
 +
 + addr = (dev-bus-number  8) + dev-devfn +
 +   dev-iov-offset + dev-iov-stride * id;
 + *busnr = addr  8;
 + *devfn = addr  0xff;
 +}
 +
 +static inline struct pci_bus *find_bus(struct pci_dev *dev, int busnr)
 +{
 + struct pci_bus *bus;
 +
 + down_read(pci_bus_sem);
 + list_for_each_entry(bus, dev-bus-children, node)
 + if (bus-number == busnr) {
 + up_read(pci_bus_sem);
 + return bus;
 + }
 + up_read(pci_bus_sem);
 +
 + return NULL;
 +}
 +
 +static int alloc_virtfn(struct pci_dev *dev, int id)
 +{
 + int i;
 + int rc;
 + u8 busnr, devfn;
 + unsigned long size;
 + struct pci_dev *new;
 + struct pci_bus *bus;
 + struct resource *res;
 +
 + get_addr(dev, id, busnr, devfn);
 +
 + new = alloc_pci_dev();
 + if (!new)
 + return -ENOMEM;
 +
 + bus = find_bus(dev, busnr);
 + BUG_ON(!bus);
 + new-bus = bus;
 + new-sysdata = bus-sysdata;
 + new-dev.parent = dev-dev.parent;
 + new-dev.bus = dev-dev.bus;
 + new-devfn = devfn;
 + new-hdr_type = PCI_HEADER_TYPE_NORMAL;
 + new-multifunction = 0;
 + new-vendor = dev-vendor;
 + pci_read_config_word(dev, dev-iov-cap + PCI_IOV_VF_DID, new-device);
 + new-cfg_size = 4096;
 + new-error_state = pci_channel_io_normal;
 + new-pcie_type = PCI_EXP_TYPE_ENDPOINT;
 + new-dma_mask = 0x;
 +
 + dev_set_name(new-dev, %04x:%02x:%02x.%d, pci_domain_nr(bus),
 +  busnr, PCI_SLOT(devfn), PCI_FUNC(devfn));
 +
 + pci_read_config_byte(new, PCI_REVISION_ID, new-revision);
 + new-class = dev-class;
 + new-current_state =