[PATCH 1/3] VFIO: Architecture dependent VFIO device operations

2013-03-15 Thread Gavin Shan
Some architectures like PPC, especailly PowerNV platform, need to
do additional operations while adding or removing VFIO devices to
or from VFIO bus. The patch adds weak functions while to open,
release or ioctl for the specific VFIO device. Those functions could
be overrided by individual architectures if necessary.

Signed-off-by: Gavin Shan 
---
 drivers/vfio/pci/vfio_pci.c |   42 +++---
 include/linux/vfio.h|7 ++-
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 8189cb6..1a53e77 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -143,32 +143,51 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
pci_restore_state(pdev);
 }
 
+void __weak vfio_pci_arch_release(struct pci_dev *pdev)
+{
+   return;
+}
+
 static void vfio_pci_release(void *device_data)
 {
struct vfio_pci_device *vdev = device_data;
 
-   if (atomic_dec_and_test(&vdev->refcnt))
+   if (atomic_dec_and_test(&vdev->refcnt)) {
+   vfio_pci_arch_release(vdev->pdev);
+
vfio_pci_disable(vdev);
+   }
 
module_put(THIS_MODULE);
 }
 
+int __weak vfio_pci_arch_open(struct pci_dev *pdev)
+{
+   return 0;
+}
+
 static int vfio_pci_open(void *device_data)
 {
struct vfio_pci_device *vdev = device_data;
+   int ret;
 
if (!try_module_get(THIS_MODULE))
return -ENODEV;
 
if (atomic_inc_return(&vdev->refcnt) == 1) {
-   int ret = vfio_pci_enable(vdev);
-   if (ret) {
-   module_put(THIS_MODULE);
-   return ret;
-   }
+   ret = vfio_pci_arch_open(vdev->pdev);
+   if (ret)
+   goto fail;
+
+   ret = vfio_pci_enable(vdev);
+   if (ret)
+   goto fail;
}
 
return 0;
+fail:
+   module_put(THIS_MODULE);
+   return ret;
 }
 
 static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
@@ -206,6 +225,12 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device 
*vdev, int irq_type)
return 0;
 }
 
+long __weak vfio_pci_arch_ioctl(struct pci_dev *pdev,
+   unsigned int cmd, unsigned long arg)
+{
+   return -ENOTTY;
+}
+
 static long vfio_pci_ioctl(void *device_data,
   unsigned int cmd, unsigned long arg)
 {
@@ -374,9 +399,12 @@ static long vfio_pci_ioctl(void *device_data,
 
return ret;
 
-   } else if (cmd == VFIO_DEVICE_RESET)
+   } else if (cmd == VFIO_DEVICE_RESET) {
return vdev->reset_works ?
pci_reset_function(vdev->pdev) : -EINVAL;
+   } else {
+   return vfio_pci_arch_ioctl(vdev->pdev, cmd, arg);
+   }
 
return -ENOTTY;
 }
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ab9e862..a991c39 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -11,9 +11,9 @@
 #ifndef VFIO_H
 #define VFIO_H
 
-
 #include 
 #include 
+#include 
 #include 
 
 /**
@@ -40,6 +40,11 @@ struct vfio_device_ops {
int (*mmap)(void *device_data, struct vm_area_struct *vma);
 };
 
+extern int vfio_pci_arch_open(struct pci_dev *pdev);
+extern long vfio_pci_arch_ioctl(struct pci_dev *pdev,
+   unsigned int cmd,
+   unsigned long arg);
+extern void vfio_pci_arch_release(struct pci_dev *pdev);
 extern int vfio_add_group_dev(struct device *dev,
  const struct vfio_device_ops *ops,
  void *device_data);
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] VFIO change for EEH support

2013-03-15 Thread Gavin Shan
The EEH (Enhanced Error Handling) is one of RAS features on IBM Power
machines. In order to support EEH, the VFIO needs some modification
as the patchset addresses. Firstly, the address (domain:bus:slot:function)
of passed PCI devices looks quite different from host and guest perspectives.
So we have to mantain the address mapping in host so that the EEH could
direct the EEH errors from guest to proper PCI device. Unfortunately, it
seems that the VFIO implementation doesn't include the mechanism yet. On
the other hand, it's totally business of individual platforms. So I introduced
some weak functions in VFIO driver and individual platforms can override
that to figure out more information that platform needs. Apart from that,
the last patch [3/3] is changing the current behavior of accessing uncoverred
config space for specific PCI device.

The patchset is expected to be applied after Alexy's patchset (supporting
VFIO on PowerNV platform). Besides, there're patchset based on it queued
in my personal tree for EEH core to support PowerKVM guest. With all of
them (Alexy's patchset, this patchset, EEH core patchset), I can sucessfully
pass PCI device to guest and recover it from EEH errors.

drivers/vfio/pci/vfio_pci.c|   42 ++--
drivers/vfio/pci/vfio_pci_config.c |   31 +-
include/linux/vfio.h   |7 +-
include/uapi/linux/vfio.h  |   16 +
4 files changed, 77 insertions(+), 19 deletions(-)

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] VFIO: VFIO_DEVICE_SET_ADDR_MAPPING command

2013-03-15 Thread Gavin Shan
The address (domain/bus/slot/function) of the passed PCI device
looks quite different from perspective of host and guest. Some
architectures like PPC need to setup the mapping in host. The patch
introduces additional VFIO device IOCTL command to address that.

Signed-off-by: Gavin Shan 
---
 include/uapi/linux/vfio.h |   16 
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 6e58d9b..ecc4f38 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -289,6 +289,22 @@ struct vfio_irq_set {
  */
 #define VFIO_DEVICE_RESET  _IO(VFIO_TYPE, VFIO_BASE + 11)
 
+/**
+ * VFIO_DEVICE_SET_ADDR_MAPPING - _IO(VFIO_TYPE, VFIO_BASE + 12)
+ *
+ * The address, which comprised of domain/bus/slot/function looks
+ * different between host and guest. We need to setup the mapping
+ * in host for some architectures like PPC so that the passed PCI
+ * devices could support RTAS smoothly.
+ */
+struct vfio_addr_mapping {
+   __u64 buid;
+   __u8  bus;
+   __u8  slot;
+   __u8  func;
+};
+#define VFIO_DEVICE_SET_ADDR_MAPPING   _IO(VFIO_TYPE, VFIO_BASE + 12)
+
 /*
  * The VFIO-PCI bus driver makes use of the following fixed region and
  * IRQ index mapping.  Unimplemented regions return a size of zero.
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] VFIO: Direct access config reg without capability

2013-03-15 Thread Gavin Shan
The config registers in [0, 0x40] is being supported by VFIO. Apart
from that, the other config registers should be coverred by PCI or
PCIe capability. However, there might have some PCI devices (be2net)
who has config registers (0x7c) out of [0, 0x40], and don't have
corresponding PCI or PCIe capability. VFIO will return 0x0 on reading
those registers and writing is dropped. It caused the be2net driver
fails to be loaded because 0x0 returned from its config register 0x7c.

The patch changes the behaviour so that those config registers out
of [0, 0x40] and don't have corresponding PCI or PCIe capability
will be accessed directly.

Signed-off-by: Gavin Shan 
---
 drivers/vfio/pci/vfio_pci_config.c |   31 ---
 1 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_config.c 
b/drivers/vfio/pci/vfio_pci_config.c
index 964ff22..5ea3afb 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1471,18 +1471,27 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device 
*vdev, char __user *buf,
 
cap_id = vdev->pci_config_map[*ppos / 4];
 
+   /*
+* Some PCI device config registers might not be coverred by
+* capability and useful. We will enable direct access to
+* those registers.
+*/
if (cap_id == PCI_CAP_ID_INVALID) {
-   if (iswrite)
-   return ret; /* drop */
-
-   /*
-* Per PCI spec 3.0, section 6.1, reads from reserved and
-* unimplemented registers return 0
-*/
-   if (copy_to_user(buf, &val, count))
-   return -EFAULT;
-
-   return ret;
+   if (iswrite) {
+   if (copy_from_user(&val, buf, count))
+   return -EFAULT;
+   ret = vfio_user_config_write(vdev->pdev, (int)(*ppos),
+val, count);
+   return ret ? ret : count;
+   } else {
+   ret = vfio_user_config_read(vdev->pdev, (int)(*ppos),
+   &val, count);
+   if (ret)
+   return ret;
+   if (copy_to_user(buf, &val, count))
+   return -EFAULT;
+   return count;
+   }
}
 
/*
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] drivers/vfio: Support IOMMU group for EEH operations

2015-09-17 Thread Gavin Shan
Currently, EEH module works based on the assumption that every
container has only one attached IOMMU group. It's not true any
more. So the userland has to specify the IOMMU group (PE) to
which the requested EEH operation is applied.

This exposes "v2" interface for the userland to specify IOMMU
group (PE) ID when requesting EEH operation.

Signed-off-by: Gavin Shan 
---
 drivers/vfio/vfio_iommu_spapr_tce.c | 51 -
 drivers/vfio/vfio_spapr_eeh.c   | 39 
 include/linux/vfio.h|  7 ++---
 include/uapi/linux/vfio.h   |  3 +++
 4 files changed, 75 insertions(+), 25 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
b/drivers/vfio/vfio_iommu_spapr_tce.c
index 812b43b..f85bde7 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -724,7 +724,8 @@ static long tce_iommu_ioctl(void *iommu_data,
ret = 1;
break;
default:
-   ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
+   ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd,
+arg, NULL, 0);
break;
}
 
@@ -953,17 +954,55 @@ static long tce_iommu_ioctl(void *iommu_data,
return 0;
 
case VFIO_EEH_PE_OP: {
-   struct tce_iommu_group *tcegrp;
-   int eeh_enabled;
+   struct tce_iommu_group *tmp, *tcegrp;
+   struct vfio_eeh_pe_op op;
+   int enabled, flag;
+
+   enabled = vfio_spapr_pci_eeh_enabled();
+   if (enabled == VFIO_EEH_DISABLED)
+   return -ENOTTY;
 
-   eeh_enabled = vfio_spapr_pci_eeh_enabled();
-   if (eeh_enabled == VFIO_EEH_DISABLED)
+   /* Get the specified version */
+   minsz = offsetofend(struct vfio_eeh_pe_op, flags);
+   if (copy_from_user(&op, (void __user *)arg, minsz))
+   return -EFAULT;
+   flag = (op.flags & VFIO_EEH_ENABLED_MASK);
+   if (flag > enabled)
return -ENOTTY;
+   else if (flag == VFIO_EEH_DISABLED)
+   flag = VFIO_EEH_ENABLED_V1;
+
+   if (flag == VFIO_EEH_ENABLED_V1)
+   minsz = offsetofend(struct vfio_eeh_pe_op, op);
+   else if (flag == VFIO_EEH_ENABLED_V2)
+   minsz = offsetofend(struct vfio_eeh_pe_op, groupid);
+   if (copy_from_user(&op, (void __user *)arg, minsz))
+   return -EFAULT;
+
+   if (op.argsz < minsz)
+   return -EINVAL;
+
+   if (flag == VFIO_EEH_ENABLED_V2) {
+   tcegrp = NULL;
+   list_for_each_entry(tmp, &container->group_list, next) {
+   if (tmp->grp &&
+   iommu_group_id(tmp->grp) == op.groupid) {
+   tcegrp = tmp;
+   break;
+   }
+   }
+
+   if (!tcegrp)
+   return -ENODEV;
+
+   return vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
+   cmd, arg, &op, flag);
+   }
 
ret = 0;
list_for_each_entry(tcegrp, &container->group_list, next) {
ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
-   cmd, arg);
+   cmd, arg, &op, flag);
if (ret)
return ret;
}
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index d208d77..e77dcb8 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -20,7 +20,7 @@
 
 int vfio_spapr_pci_eeh_enabled(void)
 {
-   return VFIO_EEH_ENABLED_V1;
+   return VFIO_EEH_ENABLED_V2;
 }
 EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_enabled);
 
@@ -38,11 +38,12 @@ void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
 EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
 
 long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
-   unsigned int cmd, unsigned long arg)
+   unsigned int cmd, unsigned long arg,
+   void *parm, int flag)
 {
struct eeh_pe *pe;
-   struct vfio_eeh_pe_op op;
-   unsigned long minsz;
+   struct vfio_eeh_pe_op *op;
+   unsigned long src, dst, len;
long ret = -EINVAL;
 
switch (cmd) {
@@ -54,17 +55,12 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *

[PATCH 1/2] drivers/vfio: Support EEH API revision

2015-09-17 Thread Gavin Shan
This extends the return value from container's IOCTL command
(VFIO_CHECK_EXTENSION + VFIO_EEH) to EEH API revision. Also,
extra check is applied to return -ENOTTY if EEH functionality
is disabled in vfio_spapr_iommu_eeh_ioctl().

Signed-off-by: Gavin Shan 
---
 drivers/vfio/vfio_iommu_spapr_tce.c | 5 +
 drivers/vfio/vfio_spapr_eeh.c   | 9 -
 include/linux/vfio.h| 6 ++
 include/uapi/linux/vfio.h   | 3 +++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
b/drivers/vfio/vfio_iommu_spapr_tce.c
index 0582b72..812b43b 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -954,6 +954,11 @@ static long tce_iommu_ioctl(void *iommu_data,
 
case VFIO_EEH_PE_OP: {
struct tce_iommu_group *tcegrp;
+   int eeh_enabled;
+
+   eeh_enabled = vfio_spapr_pci_eeh_enabled();
+   if (eeh_enabled == VFIO_EEH_DISABLED)
+   return -ENOTTY;
 
ret = 0;
list_for_each_entry(tcegrp, &container->group_list, next) {
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 38edeb4..d208d77 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -18,6 +18,12 @@
 #define DRIVER_AUTHOR  "Gavin Shan, IBM Corporation"
 #define DRIVER_DESC"VFIO IOMMU SPAPR EEH"
 
+int vfio_spapr_pci_eeh_enabled(void)
+{
+   return VFIO_EEH_ENABLED_V1;
+}
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_enabled);
+
 /* We might build address mapping here for "fast" path later */
 void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
@@ -42,7 +48,8 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
switch (cmd) {
case VFIO_CHECK_EXTENSION:
if (arg == VFIO_EEH)
-   ret = eeh_enabled() ? 1 : 0;
+   ret = eeh_enabled() ? vfio_spapr_pci_eeh_enabled() :
+ VFIO_EEH_DISABLED;
else
ret = 0;
break;
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ddb4409..ff036ca 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -91,12 +91,18 @@ extern long vfio_external_check_extension(struct vfio_group 
*group,
 
 struct pci_dev;
 #ifdef CONFIG_EEH
+extern int vfio_spapr_pci_eeh_enabled(void);
 extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
 extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
   unsigned int cmd,
   unsigned long arg);
 #else
+static inline int vfio_spapr_pci_eeh_enabled(void)
+{
+   return VFIO_EEH_DISABLED;
+}
+
 static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
 }
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9fd7b5d..74f5b8b 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -495,6 +495,9 @@ struct vfio_iommu_spapr_tce_info {
  * - configure PE;
  * - inject EEH error.
  */
+#define VFIO_EEH_DISABLED  0
+#define VFIO_EEH_ENABLED_V11
+
 struct vfio_eeh_pe_err {
__u32 type;
__u32 func;
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] VFIO: Accept IOMMU group (PE) ID

2015-09-17 Thread Gavin Shan
This allows to accept IOMMU group (PE) ID from the parameter from userland
when handling EEH operation so that the operation only affects the target
IOMMU group (PE). If the IOMMU group (PE) ID in the parameter from userland
is invalid, all IOMMU groups (PEs) attached to the specified container are
affected as before.

Gavin Shan (2):
  drivers/vfio: Support EEH API revision
  drivers/vfio: Support IOMMU group for EEH operations

 drivers/vfio/vfio_iommu_spapr_tce.c | 50 ++---
 drivers/vfio/vfio_spapr_eeh.c   | 46 ++
 include/linux/vfio.h| 13 +++---
 include/uapi/linux/vfio.h   |  6 +
 4 files changed, 93 insertions(+), 22 deletions(-)

-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/2] VFIO: Accept IOMMU group (PE) ID

2015-09-21 Thread Gavin Shan
On Mon, Sep 21, 2015 at 11:42:28AM +1000, David Gibson wrote:
>On Sat, Sep 19, 2015 at 04:22:47PM +1000, David Gibson wrote:
>> On Fri, Sep 18, 2015 at 09:47:32AM -0600, Alex Williamson wrote:
>> > On Fri, 2015-09-18 at 16:24 +1000, Gavin Shan wrote:
>> > > This allows to accept IOMMU group (PE) ID from the parameter from 
>> > > userland
>> > > when handling EEH operation so that the operation only affects the target
>> > > IOMMU group (PE). If the IOMMU group (PE) ID in the parameter from 
>> > > userland
>> > > is invalid, all IOMMU groups (PEs) attached to the specified container 
>> > > are
>> > > affected as before.
>> > > 
>> > > Gavin Shan (2):
>> > >   drivers/vfio: Support EEH API revision
>> > >   drivers/vfio: Support IOMMU group for EEH operations
>> > > 
>> > >  drivers/vfio/vfio_iommu_spapr_tce.c | 50 
>> > > ++---
>> > >  drivers/vfio/vfio_spapr_eeh.c   | 46 
>> > > ++
>> > >  include/linux/vfio.h| 13 +++---
>> > >  include/uapi/linux/vfio.h   |  6 +
>> > >  4 files changed, 93 insertions(+), 22 deletions(-)
>> > 
>> > This interface is terrible.  A function named foo_enabled() should
>> > return a bool, yes or no, don't try to overload it to also return a
>> > version.
>> 
>> Sorry, that one's my fault.  I suggested that approach to Gavin
>> without really thinking it through.
>> 
>> 
>> > AFAICT, patch 2/2 breaks current users by changing the offset
>> > of the union in struct vfio_eeh_pe_err.
>> 
>> Yeah, this one's ugly.  We have to preserve the offset, but that means
>> putting the group in a very awkward place.  Especially since I'm not
>> sure if there even are any existing users of the single extant union
>> branch.
>> 
>> Sigh.
>> 
>> > Also, we generally pass group
>> > file descriptors rather than a group ID because we can prove the
>> > ownership of the group through the file descriptor and we don't need to
>> > worry about races with the group because we can hold a reference to it.
>
>Duh.  I finally realised the better, simpler, obvious solution.
>
>Rather than changing the parameter structure, we should move the
>ioctl()s so they're on the group fd instead of the container fd.
>
>Obviously we need to keep it on the container fd for backwards compat,
>but I think we should just error out if there is more than one group
>in the container there.
>
>We will need a new capability too, obviously.  VFIO_EEH_GROUPFD maybe?
>

Yeah, the patches should be marked as "RFC" actually as they're actually
prototypes. I agree with David that the EEH ioctl commands should be routed
through IOMMU group as I proposed long time ago. However, if we're going
to do it now, we have to maintain two set the interfaces: one handled by
container's ioctl() and another one is handled by IOMMU group's ioctl().
Would it be a problem?

Actually, the code change is made based on the fact: nobody is using
the union (struct vfio_eeh_pe_err) yet before the QEMU changes to do
error injection gets merged by David. So I think it's fine to introduce
another field in struct vfio_eeh_pe_op though there is gap?

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/2] VFIO: Accept IOMMU group (PE) ID

2015-09-22 Thread Gavin Shan
On Mon, Sep 21, 2015 at 11:42:28AM +1000, David Gibson wrote:
>On Sat, Sep 19, 2015 at 04:22:47PM +1000, David Gibson wrote:
>> On Fri, Sep 18, 2015 at 09:47:32AM -0600, Alex Williamson wrote:
>> > On Fri, 2015-09-18 at 16:24 +1000, Gavin Shan wrote:
>> > > This allows to accept IOMMU group (PE) ID from the parameter from 
>> > > userland
>> > > when handling EEH operation so that the operation only affects the target
>> > > IOMMU group (PE). If the IOMMU group (PE) ID in the parameter from 
>> > > userland
>> > > is invalid, all IOMMU groups (PEs) attached to the specified container 
>> > > are
>> > > affected as before.
>> > > 
>> > > Gavin Shan (2):
>> > >   drivers/vfio: Support EEH API revision
>> > >   drivers/vfio: Support IOMMU group for EEH operations
>> > > 
>> > >  drivers/vfio/vfio_iommu_spapr_tce.c | 50 
>> > > ++---
>> > >  drivers/vfio/vfio_spapr_eeh.c   | 46 
>> > > ++
>> > >  include/linux/vfio.h| 13 +++---
>> > >  include/uapi/linux/vfio.h   |  6 +
>> > >  4 files changed, 93 insertions(+), 22 deletions(-)
>> > 
>> > This interface is terrible.  A function named foo_enabled() should
>> > return a bool, yes or no, don't try to overload it to also return a
>> > version.
>> 
>> Sorry, that one's my fault.  I suggested that approach to Gavin
>> without really thinking it through.
>> 
>> 
>> > AFAICT, patch 2/2 breaks current users by changing the offset
>> > of the union in struct vfio_eeh_pe_err.
>> 
>> Yeah, this one's ugly.  We have to preserve the offset, but that means
>> putting the group in a very awkward place.  Especially since I'm not
>> sure if there even are any existing users of the single extant union
>> branch.
>> 
>> Sigh.
>> 

Yeah, Perhaps, I should have put "RFC" on the subjects because those
patches are really pre-mature and just intend to bring more discussion
on it.

>> > Also, we generally pass group
>> > file descriptors rather than a group ID because we can prove the
>> > ownership of the group through the file descriptor and we don't need to
>> > worry about races with the group because we can hold a reference to it.
>
>Duh.  I finally realised the better, simpler, obvious solution.
>
>Rather than changing the parameter structure, we should move the
>ioctl()s so they're on the group fd instead of the container fd.
>
>Obviously we need to keep it on the container fd for backwards compat,
>but I think we should just error out if there is more than one group
>in the container there.
>
>We will need a new capability too, obviously.  VFIO_EEH_GROUPFD maybe?
>

Yes, I agree to route EEH ioctl commands to group fd since EEH ioctl
commands operate on granularity of PE (IOMMU group). However, it requires
to extend current code to support that. I'm not sure if it's good idea as
I explained to David through IRC. Waiting for Alex to judge:

- Adding a callback to "struct vfio_group": platform_ioctl();
- When attaching the group to platform, this function is initialized;
- The EEH ioctl commands are routed to platform_ioctl() in 
vfio_group_fops_unl_ioctl()

Thanks,
Gavin

>-- 
>David Gibson   | I'll have my music baroque, and my code
>david AT gibson.dropbear.id.au | minimalist, thank you.  NOT _the_ _other_
>   | _way_ _around_!
>http://www.ozlabs.org/~dgibson


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH kernel v11 33/34] vfio: powerpc/spapr: Register memory and define IOMMU v2

2015-06-04 Thread Gavin Shan
On Fri, May 29, 2015 at 06:44:57PM +1000, Alexey Kardashevskiy wrote:
>The existing implementation accounts the whole DMA window in
>the locked_vm counter. This is going to be worse with multiple
>containers and huge DMA windows. Also, real-time accounting would requite
>additional tracking of accounted pages due to the page size difference -
>IOMMU uses 4K pages and system uses 4K or 64K pages.
>
>Another issue is that actual pages pinning/unpinning happens on every
>DMA map/unmap request. This does not affect the performance much now as
>we spend way too much time now on switching context between
>guest/userspace/host but this will start to matter when we add in-kernel
>DMA map/unmap acceleration.
>
>This introduces a new IOMMU type for SPAPR - VFIO_SPAPR_TCE_v2_IOMMU.
>New IOMMU deprecates VFIO_IOMMU_ENABLE/VFIO_IOMMU_DISABLE and introduces
>2 new ioctls to register/unregister DMA memory -
>VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY -
>which receive user space address and size of a memory region which
>needs to be pinned/unpinned and counted in locked_vm.
>New IOMMU splits physical pages pinning and TCE table update
>into 2 different operations. It requires:
>1) guest pages to be registered first
>2) consequent map/unmap requests to work only with pre-registered memory.
>For the default single window case this means that the entire guest
>(instead of 2GB) needs to be pinned before using VFIO.
>When a huge DMA window is added, no additional pinning will be
>required, otherwise it would be guest RAM + 2GB.
>
>The new memory registration ioctls are not supported by
>VFIO_SPAPR_TCE_IOMMU. Dynamic DMA window and in-kernel acceleration
>will require memory to be preregistered in order to work.
>
>The accounting is done per the user process.
>
>This advertises v2 SPAPR TCE IOMMU and restricts what the userspace
>can do with v1 or v2 IOMMUs.
>
>In order to support memory pre-registration, we need a way to track
>the use of every registered memory region and only allow unregistration
>if a region is not in use anymore. So we need a way to tell from what
>region the just cleared TCE was from.
>
>This adds a userspace view of the TCE table into iommu_table struct.
>It contains userspace address, one per TCE entry. The table is only
>allocated when the ownership over an IOMMU group is taken which means
>it is only used from outside of the powernv code (such as VFIO).
>
>Signed-off-by: Alexey Kardashevskiy 
>[aw: for the vfio related changes]
>Acked-by: Alex Williamson 
>---
>Changes:
>v11:
>* mm_iommu_put() does not return a code so this does not check it
>* moved "v2" in tce_container to pack the struct
>
>v10:
>* moved it_userspace allocation to vfio_iommu_spapr_tce as it VFIO
>specific thing
>* squashed "powerpc/iommu: Add userspace view of TCE table" into this as
>it is
>a part of IOMMU v2
>* s/tce_iommu_use_page_v2/tce_iommu_prereg_ua_to_hpa/
>* fixed some function names to have "tce_iommu_" in the beginning rather
>just "tce_"
>* as mm_iommu_mapped_inc() can now fail, check for the return code
>
>v9:
>* s/tce_get_hva_cached/tce_iommu_use_page_v2/
>
>v7:
>* now memory is registered per mm (i.e. process)
>* moved memory registration code to powerpc/mmu
>* merged "vfio: powerpc/spapr: Define v2 IOMMU" into this
>* limited new ioctls to v2 IOMMU
>* updated doc
>* unsupported ioclts return -ENOTTY instead of -EPERM
>
>v6:
>* tce_get_hva_cached() returns hva via a pointer
>
>v4:
>* updated docs
>* s/kzmalloc/vzalloc/
>* in tce_pin_pages()/tce_unpin_pages() removed @vaddr, @size and
>replaced offset with index
>* renamed vfio_iommu_type_register_memory to vfio_iommu_spapr_register_memory
>and removed duplicating vfio_iommu_spapr_register_memory
>---
> Documentation/vfio.txt  |  31 ++-
> arch/powerpc/include/asm/iommu.h|   6 +
> drivers/vfio/vfio_iommu_spapr_tce.c | 512 ++--
> include/uapi/linux/vfio.h   |  27 ++
> 4 files changed, 487 insertions(+), 89 deletions(-)
>
>diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>index 96978ec..7dcf2b5 100644
>--- a/Documentation/vfio.txt
>+++ b/Documentation/vfio.txt
>@@ -289,10 +289,12 @@ PPC64 sPAPR implementation note
>
> This implementation has some specifics:
>
>-1) Only one IOMMU group per container is supported as an IOMMU group
>-represents the minimal entity which isolation can be guaranteed for and
>-groups are allocated statically, one per a Partitionable Endpoint (PE)
>+1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per
>+container is supported as an IOMMU table is allocated at the boot time,
>+one table per a IOMMU group which is a Partitionable Endpoint (PE)
> (PE is often a PCI domain but not always).
>+Newer systems (POWER8 with IODA2) have improved hardware design which allows
>+to remove this limitation and have multiple IOMMU groups per a VFIO container.
>
> 2) The hardware supports so called DMA windows - the PCI address range
> within which 

Re: [PATCH kernel v11 05/34] powerpc/iommu: Always release iommu_table in iommu_free_table()

2015-06-04 Thread Gavin Shan
On Fri, May 29, 2015 at 06:44:29PM +1000, Alexey Kardashevskiy wrote:
>At the moment iommu_free_table() only releases memory if
>the table was initialized for the platform code use, i.e. it had
>it_map initialized (which purpose is to track DMA memory space use).
>
>With dynamic DMA windows, we will need to be able to release
>iommu_table even if it was used for VFIO in which case it_map is NULL
>so does the patch.
>
>Signed-off-by: Alexey Kardashevskiy 

Reviewed-by: Gavin Shan 

Thanks,
Gavin

>---
>Changes:
>v11:
>* fixed parameter checks
>---
> arch/powerpc/kernel/iommu.c | 8 +---
> 1 file changed, 5 insertions(+), 3 deletions(-)
>
>diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
>index 3d47eb3..73eb39a 100644
>--- a/arch/powerpc/kernel/iommu.c
>+++ b/arch/powerpc/kernel/iommu.c
>@@ -713,9 +713,11 @@ void iommu_free_table(struct iommu_table *tbl, const char 
>*node_name)
>   unsigned long bitmap_sz;
>   unsigned int order;
>
>-  if (!tbl || !tbl->it_map) {
>-  printk(KERN_ERR "%s: expected TCE map for %s\n", __func__,
>-  node_name);
>+  if (!tbl)
>+  return;
>+
>+  if (!tbl->it_map) {
>+  kfree(tbl);
>   return;
>   }
>
>-- 
>2.4.0.rc3.8.gfb3e7d5
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH kernel v11 16/34] powerpc/spapr: vfio: Replace iommu_table with iommu_table_group

2015-06-04 Thread Gavin Shan
On Fri, May 29, 2015 at 06:44:40PM +1000, Alexey Kardashevskiy wrote:
>Modern IBM POWERPC systems support multiple (currently two) TCE tables
>per IOMMU group (a.k.a. PE). This adds a iommu_table_group container
>for TCE tables. Right now just one table is supported.
>
>This defines iommu_table_group struct which stores pointers to
>iommu_group and iommu_table(s). This replaces iommu_table with
>iommu_table_group where iommu_table was used to identify a group:
>- iommu_register_group();
>- iommudata of generic iommu_group;
>
>This removes @data from iommu_table as it_table_group provides
>same access to pnv_ioda_pe.
>
>For IODA, instead of embedding iommu_table, the new iommu_table_group
>keeps pointers to those. The iommu_table structs are allocated
>dynamically.
>
>For P5IOC2, both iommu_table_group and iommu_table are embedded into
>PE struct. As there is no EEH and SRIOV support for P5IOC2,
>iommu_free_table() should not be called on iommu_table struct pointers
>so we can keep it embedded in pnv_phb::p5ioc2.
>
>For pSeries, this replaces multiple calls of kzalloc_node() with a new
>iommu_pseries_alloc_group() helper and stores the table group struct
>pointer into the pci_dn struct. For release, a iommu_table_free_group()
>helper is added.
>
>This moves iommu_table struct allocation from SR-IOV code to
>the generic DMA initialization code in pnv_pci_ioda_setup_dma_pe and
>pnv_pci_ioda2_setup_dma_pe as this is where DMA is actually initialized.
>This change is here because those lines had to be changed anyway.
>
>This should cause no behavioural change.
>
>Signed-off-by: Alexey Kardashevskiy 
>[aw: for the vfio related changes]
>Acked-by: Alex Williamson 

Reviewed-by: Gavin Shan 

>---
>Changes:
>v11:
>* iommu_table_group moved outside #ifdef CONFIG_IOMMU_API as iommu_table
>is dynamically allocated and it needs a pointer to PE and
>iommu_table_group is this pointer
>
>v10:
>* new to the series, separated from
>"powerpc/spapr: vfio: Switch from iommu_table to new iommu_table_group"
>* iommu_table is not embedded into iommu_table_group but allocated
>dynamically in most cases
>* iommu_table allocation is moved to a single place for IODA2's
>pnv_pci_ioda_setup_dma_pe where it belongs to
>* added list of groups into iommu_table; most of the code just looks at
>the first item to keep the patch simpler
>---
> arch/powerpc/include/asm/iommu.h|  19 ++---
> arch/powerpc/include/asm/pci-bridge.h   |   2 +-
> arch/powerpc/kernel/iommu.c |  17 ++---
> arch/powerpc/platforms/powernv/pci-ioda.c   |  55 +++---
> arch/powerpc/platforms/powernv/pci-p5ioc2.c |  18 +++--
> arch/powerpc/platforms/powernv/pci.h|   3 +-
> arch/powerpc/platforms/pseries/iommu.c  | 107 +++-
> drivers/vfio/vfio_iommu_spapr_tce.c |  23 +++---
> 8 files changed, 152 insertions(+), 92 deletions(-)
>
>diff --git a/arch/powerpc/include/asm/iommu.h 
>b/arch/powerpc/include/asm/iommu.h
>index e2a45c3..5a7267f 100644
>--- a/arch/powerpc/include/asm/iommu.h
>+++ b/arch/powerpc/include/asm/iommu.h
>@@ -91,14 +91,9 @@ struct iommu_table {
>   struct iommu_pool pools[IOMMU_NR_POOLS];
>   unsigned long *it_map;   /* A simple allocation bitmap for now */
>   unsigned long  it_page_shift;/* table iommu page size */
>-#ifdef CONFIG_IOMMU_API
>-  struct iommu_group *it_group;
>-#endif
>+  struct iommu_table_group *it_table_group;
>   struct iommu_table_ops *it_ops;
>   void (*set_bypass)(struct iommu_table *tbl, bool enable);
>-#ifdef CONFIG_PPC_POWERNV
>-  void   *data;
>-#endif
> };
>
> /* Pure 2^n version of get_order */
>@@ -129,14 +124,22 @@ extern void iommu_free_table(struct iommu_table *tbl, 
>const char *node_name);
>  */
> extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
>   int nid);
>+#define IOMMU_TABLE_GROUP_MAX_TABLES  1
>+
>+struct iommu_table_group {
>+  struct iommu_group *group;
>+  struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
>+};
>+

Number of TCE tables supported in group would be worthy to be
changed dynamically in long run, but not for now. P7IOC has one
table per group while PHB3 has two tables per group.

Thanks,
Gavin

> #ifdef CONFIG_IOMMU_API
>-extern void iommu_register_group(struct iommu_table *tbl,
>+
>+extern void iommu_register_group(struct iommu_table_group *table_group,
>int pci_domain_number, unsigned long pe_num);
> extern int iommu_add_device(struct device *dev);
> extern void iommu_del_device(struct device *dev);
> extern int __init tce_iommu_bus_notifier_i

Re: [PATCH kernel v11 20/34] powerpc/powernv/ioda2: Move TCE kill register address to PE

2015-06-04 Thread Gavin Shan
On Fri, May 29, 2015 at 06:44:44PM +1000, Alexey Kardashevskiy wrote:
>At the moment the DMA setup code looks for the "ibm,opal-tce-kill"
>property which contains the TCE kill register address. Writing to
>this register invalidates TCE cache on IODA/IODA2 hub.
>
>This moves the register address from iommu_table to pnv_pnb as this
>register belongs to PHB and invalidates TCE cache for all tables of
>all attached PEs.
>
>This moves the property reading/remapping code to a helper which is
>called when DMA is being configured for PE and which does DMA setup
>for both IODA1 and IODA2.
>
>This adds a new pnv_pci_ioda2_tce_invalidate_entire() helper which
>invalidates cache for the entire table. It should be called after
>every call to opal_pci_map_pe_dma_window(). It was not required before
>because there was just a single TCE table and 64bit DMA was handled via
>bypass window (which has no table so no cache was used) but this is going
>to change with Dynamic DMA windows (DDW).
>
>Signed-off-by: Alexey Kardashevskiy 

Reviewed-by: Gavin Shan 

Thanks,
Gavin

>---
>Changes:
>v11:
>* s/pnv_pci_ioda2_tvt_invalidate/pnv_pci_ioda2_tce_invalidate_entire/g
>(cannot think of better-and-shorter name)
>* moved tce_inval_reg_phys/tce_inval_reg to pnv_phb
>
>v10:
>* fixed error from checkpatch.pl
>* removed comment at "ibm,opal-tce-kill" parsing as irrelevant
>* s/addr/val/ in pnv_pci_ioda2_tvt_invalidate() as it was not a kernel address
>
>v9:
>* new in the series
>---
> arch/powerpc/platforms/powernv/pci-ioda.c | 66 ++-
> arch/powerpc/platforms/powernv/pci.h  |  7 +++-
> 2 files changed, 44 insertions(+), 29 deletions(-)
>
>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
>b/arch/powerpc/platforms/powernv/pci-ioda.c
>index 1d0bb5b..3fd8b18 100644
>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>@@ -1679,8 +1679,8 @@ static void pnv_pci_ioda1_tce_invalidate(struct 
>iommu_table *tbl,
>   struct pnv_ioda_pe *pe = container_of(tgl->table_group,
>   struct pnv_ioda_pe, table_group);
>   __be64 __iomem *invalidate = rm ?
>-  (__be64 __iomem *)pe->tce_inval_reg_phys :
>-  (__be64 __iomem *)tbl->it_index;
>+  (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
>+  pe->phb->ioda.tce_inval_reg;
>   unsigned long start, end, inc;
>   const unsigned shift = tbl->it_page_shift;
>
>@@ -1751,6 +1751,19 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
>   .get = pnv_tce_get,
> };
>
>+static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
>+{
>+  /* 01xb - invalidate TCEs that match the specified PE# */
>+  unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
>+  struct pnv_phb *phb = pe->phb;
>+
>+  if (!phb->ioda.tce_inval_reg)
>+  return;
>+
>+  mb(); /* Ensure above stores are visible */
>+  __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
>+}
>+
> static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
>   unsigned long index, unsigned long npages, bool rm)
> {
>@@ -1761,8 +1774,8 @@ static void pnv_pci_ioda2_tce_invalidate(struct 
>iommu_table *tbl,
>   struct pnv_ioda_pe, table_group);
>   unsigned long start, end, inc;
>   __be64 __iomem *invalidate = rm ?
>-  (__be64 __iomem *)pe->tce_inval_reg_phys :
>-  (__be64 __iomem *)tbl->it_index;
>+  (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
>+  pe->phb->ioda.tce_inval_reg;
>   const unsigned shift = tbl->it_page_shift;
>
>   /* We'll invalidate DMA address in PE scope */
>@@ -1820,7 +1833,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
>*phb,
> {
>
>   struct page *tce_mem = NULL;
>-  const __be64 *swinvp;
>   struct iommu_table *tbl;
>   unsigned int i;
>   int64_t rc;
>@@ -1877,20 +1889,11 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
>*phb,
> base << 28, IOMMU_PAGE_SHIFT_4K);
>
>   /* OPAL variant of P7IOC SW invalidated TCEs */
>-  swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
>-  if (swinvp) {
>-  /* We need a couple more fields -- an address and a data
>-   * to or.  Since the bus is only printed out on table free
>-   * errors, and on the first pass the data will be a relative
>-   * bus number, print that out inst

Re: [PATCH kernel v11 21/34] powerpc/powernv/ioda2: Add TCE invalidation for all attached groups

2015-06-04 Thread Gavin Shan
On Fri, May 29, 2015 at 06:44:45PM +1000, Alexey Kardashevskiy wrote:
>The iommu_table struct keeps a list of IOMMU groups it is used for.
>At the moment there is just a single group attached but further
>patches will add TCE table sharing. When sharing is enabled, TCE cache
>in each PE needs to be invalidated so does the patch.
>
>This does not change pnv_pci_ioda1_tce_invalidate() as there is no plan
>to enable TCE table sharing on PHBs older than IODA2.
>
>Signed-off-by: Alexey Kardashevskiy 

Reviewed-by: Gavin Shan 

>---
>Changes:
>v10:
>* new to the series
>---
> arch/powerpc/platforms/powernv/pci-ioda.c | 35 ---
> 1 file changed, 23 insertions(+), 12 deletions(-)
>
>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
>b/arch/powerpc/platforms/powernv/pci-ioda.c
>index 3fd8b18..94fccc8 100644
>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>@@ -24,6 +24,7 @@
> #include 
> #include 
> #include 
>+#include 
>
> #include 
> #include 
>@@ -1764,23 +1765,15 @@ static inline void 
>pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
>   __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
> }
>
>-static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
>-  unsigned long index, unsigned long npages, bool rm)
>+static void pnv_pci_ioda2_tce_do_invalidate(unsigned pe_number, bool rm,
>+  __be64 __iomem *invalidate, unsigned shift,
>+  unsigned long index, unsigned long npages)

The better function name would be: pnv_pci_ioda2_do_tce_invalidate(), and
it seems we needn't "bool rm" any more since "invalidate" has been assigned
with virtual/real address by caller.

Thanks,
Gavin

> {
>-  struct iommu_table_group_link *tgl = list_first_entry_or_null(
>-  &tbl->it_group_list, struct iommu_table_group_link,
>-  next);
>-  struct pnv_ioda_pe *pe = container_of(tgl->table_group,
>-  struct pnv_ioda_pe, table_group);
>   unsigned long start, end, inc;
>-  __be64 __iomem *invalidate = rm ?
>-  (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
>-  pe->phb->ioda.tce_inval_reg;
>-  const unsigned shift = tbl->it_page_shift;
>
>   /* We'll invalidate DMA address in PE scope */
>   start = 0x2ull << 60;
>-  start |= (pe->pe_number & 0xFF);
>+  start |= (pe_number & 0xFF);
>   end = start;
>
>   /* Figure out the start, end and step */
>@@ -1798,6 +1791,24 @@ static void pnv_pci_ioda2_tce_invalidate(struct 
>iommu_table *tbl,
>   }
> }
>
>+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
>+  unsigned long index, unsigned long npages, bool rm)
>+{
>+  struct iommu_table_group_link *tgl;
>+
>+  list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
>+  struct pnv_ioda_pe *pe = container_of(tgl->table_group,
>+  struct pnv_ioda_pe, table_group);
>+  __be64 __iomem *invalidate = rm ?
>+  (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
>+  pe->phb->ioda.tce_inval_reg;
>+
>+  pnv_pci_ioda2_tce_do_invalidate(pe->pe_number, rm,
>+  invalidate, tbl->it_page_shift,
>+  index, npages);
>+  }
>+}
>+
> static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
>   long npages, unsigned long uaddr,
>   enum dma_data_direction direction,
>-- 
>2.4.0.rc3.8.gfb3e7d5
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 2/4] vfio: spapr: Fix build error

2014-08-05 Thread Gavin Shan
On Tue, Aug 05, 2014 at 09:12:50PM -0600, Alex Williamson wrote:
>On Wed, 2014-08-06 at 12:48 +1000, Alexey Kardashevskiy wrote:
>> From: Gavin Shan 
>> 
>> The VFIO related components could be built as dynamic modules.
>> Unfortunately, CONFIG_EEH can't be configured to "m". The patch
>> fixes the build errors when configuring VFIO related components
>> as dynamic modules as follows:
>> 
>>   CC [M]  drivers/vfio/vfio_iommu_spapr_tce.o
>> In file included from drivers/vfio/vfio.c:33:0:
>> include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \
>> inside parameter list [enabled by default]
>> :
>>   WRAParch/powerpc/boot/zImage.pseries
>>   WRAParch/powerpc/boot/zImage.maple
>>   WRAParch/powerpc/boot/zImage.pmac
>>   WRAParch/powerpc/boot/zImage.epapr
>>   MODPOST 1818 modules
>> ERROR: ".vfio_spapr_iommu_eeh_ioctl" [drivers/vfio/vfio_iommu_spapr_tce.ko]\
>> undefined!
>> ERROR: ".vfio_spapr_pci_eeh_open" [drivers/vfio/pci/vfio-pci.ko] undefined!
>> ERROR: ".vfio_spapr_pci_eeh_release" [drivers/vfio/pci/vfio-pci.ko] 
>> undefined!
>> 
>> Reported-by: Alexey Kardashevskiy 
>> Signed-off-by: Gavin Shan 
>> [removed include "pci.h" in vfio.c]
>> Signed-off-by: Alexey Kardashevskiy 
>> ---
>> Changes:
>> v2:
>> * removed #include  from vfio.c and tested

Appreciated for Alexey's help on it :)

>
>I also commented regarding the ifdef around all of vfio_spapr_eeh.c:
>
>Why not add a new CONFIG_VFIO_SPAPR_EEH option to handle this
>instead?
>
>Did you disagree?  The ifdef is pretty ugly.
>

I'll introduce CONFIG_VFIO_SPAPR_EEH.

Thanks,
Gavin

>> ---
>>  drivers/vfio/Makefile | 4 ++--
>>  drivers/vfio/vfio_spapr_eeh.c | 6 ++
>>  2 files changed, 8 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
>> index 50e30bc..4891cca 100644
>> --- a/drivers/vfio/Makefile
>> +++ b/drivers/vfio/Makefile
>> @@ -1,5 +1,5 @@
>>  obj-$(CONFIG_VFIO) += vfio.o
>>  obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
>> -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
>> -obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
>> +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o \
>> +  vfio_spapr_eeh.o
>>  obj-$(CONFIG_VFIO_PCI) += pci/
>> diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
>> index f834b4c..1a93e83 100644
>> --- a/drivers/vfio/vfio_spapr_eeh.c
>> +++ b/drivers/vfio/vfio_spapr_eeh.c
>> @@ -14,15 +14,19 @@
>>  #include 
>>  
>>  /* We might build address mapping here for "fast" path later */
>> +#ifdef CONFIG_EEH
>> +
>>  int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
>>  {
>>  return eeh_dev_open(pdev);
>>  }
>> +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
>>  
>>  void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
>>  {
>>  eeh_dev_release(pdev);
>>  }
>> +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
>>  
>>  long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>>  unsigned int cmd, unsigned long arg)
>> @@ -85,3 +89,5 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>>  
>>  return ret;
>>  }
>> +EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl);
>> +#endif /* CONFIG_EEH */
>
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 2/4] vfio: spapr: Fix build error

2014-08-05 Thread Gavin Shan
On Wed, Aug 06, 2014 at 04:33:29PM +1000, Alexey Kardashevskiy wrote:
>On 08/06/2014 01:54 PM, Gavin Shan wrote:
>> On Tue, Aug 05, 2014 at 09:12:50PM -0600, Alex Williamson wrote:
>>> On Wed, 2014-08-06 at 12:48 +1000, Alexey Kardashevskiy wrote:
>>>> From: Gavin Shan 
>>>>
>>>> The VFIO related components could be built as dynamic modules.
>>>> Unfortunately, CONFIG_EEH can't be configured to "m". The patch
>>>> fixes the build errors when configuring VFIO related components
>>>> as dynamic modules as follows:
>>>>
>>>>   CC [M]  drivers/vfio/vfio_iommu_spapr_tce.o
>>>> In file included from drivers/vfio/vfio.c:33:0:
>>>> include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \
>>>> inside parameter list [enabled by default]
>>>> :
>>>>   WRAParch/powerpc/boot/zImage.pseries
>>>>   WRAParch/powerpc/boot/zImage.maple
>>>>   WRAParch/powerpc/boot/zImage.pmac
>>>>   WRAParch/powerpc/boot/zImage.epapr
>>>>   MODPOST 1818 modules
>>>> ERROR: ".vfio_spapr_iommu_eeh_ioctl" 
>>>> [drivers/vfio/vfio_iommu_spapr_tce.ko]\
>>>> undefined!
>>>> ERROR: ".vfio_spapr_pci_eeh_open" [drivers/vfio/pci/vfio-pci.ko] undefined!
>>>> ERROR: ".vfio_spapr_pci_eeh_release" [drivers/vfio/pci/vfio-pci.ko] 
>>>> undefined!
>>>>
>>>> Reported-by: Alexey Kardashevskiy 
>>>> Signed-off-by: Gavin Shan 
>>>> [removed include "pci.h" in vfio.c]
>>>> Signed-off-by: Alexey Kardashevskiy 
>>>> ---
>>>> Changes:
>>>> v2:
>>>> * removed #include  from vfio.c and tested
>> 
>> Appreciated for Alexey's help on it :)
>
>
>My bad, that was wrong actually, we still need this:
>

Yep, no worries. I'll fix it in next revision.

>diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>index 25a0fbd..224128a 100644
>--- a/include/linux/vfio.h
>+++ b/include/linux/vfio.h
>@@ -98,6 +98,7 @@ extern int vfio_external_user_iommu_id(struct vfio_group
>*group);
> extern long vfio_external_check_extension(struct vfio_group *group,
>  unsigned long arg);
>
>+struct pci_dev;
> #ifdef CONFIG_EEH
> extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
> extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
>
>
>
>Otherwise it is "warning: ‘struct pci_dev’ declared inside parameter list".
>For some reason I only see this warning when backporting this patch to 3.10
>and I do not see it in 3.16-rc7, I guess pci.h gets included somewhere.
>
>
>> 
>>>
>>> I also commented regarding the ifdef around all of vfio_spapr_eeh.c:
>>>
>>>Why not add a new CONFIG_VFIO_SPAPR_EEH option to handle this
>>>instead?
>>>
>>> Did you disagree?  The ifdef is pretty ugly.
>>>
>> 
>> I'll introduce CONFIG_VFIO_SPAPR_EEH.
>
>
>So, Gavin, then the patchset is yours (again), ok?
>

Sure. Thanks for your help, Alexey. I was busy with other
bugs with higher priority recently. I'm working on this.

Thanks,
Gavin

>
>
>
>> Thanks,
>> Gavin
>> 
>>>> ---
>>>>  drivers/vfio/Makefile | 4 ++--
>>>>  drivers/vfio/vfio_spapr_eeh.c | 6 ++
>>>>  2 files changed, 8 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
>>>> index 50e30bc..4891cca 100644
>>>> --- a/drivers/vfio/Makefile
>>>> +++ b/drivers/vfio/Makefile
>>>> @@ -1,5 +1,5 @@
>>>>  obj-$(CONFIG_VFIO) += vfio.o
>>>>  obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
>>>> -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
>>>> -obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
>>>> +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o \
>>>> +vfio_spapr_eeh.o
>>>>  obj-$(CONFIG_VFIO_PCI) += pci/
>>>> diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
>>>> index f834b4c..1a93e83 100644
>>>> --- a/drivers/vfio/vfio_spapr_eeh.c
>>>> +++ b/drivers/vfio/vfio_spapr_eeh.c
>>>> @@ -14,15 +14,19 @@
>>>>  #include 
>>>>  
>>>>  /* We might build address mapping here for "fast" path later */
>>>> +#ifdef CONFIG_EEH
>>>> +
>>>>  int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
>>>>  {
>>>>return eeh_dev_open(pdev);
>>>>  }
>>>> +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
>>>>  
>>>>  void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
>>>>  {
>>>>eeh_dev_release(pdev);
>>>>  }
>>>> +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
>>>>  
>>>>  long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>>>>unsigned int cmd, unsigned long arg)
>>>> @@ -85,3 +89,5 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group 
>>>> *group,
>>>>  
>>>>return ret;
>>>>  }
>>>> +EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl);
>>>> +#endif /* CONFIG_EEH */
>>>
>>>
>>>
>> 
>
>
>-- 
>Alexey
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 3/4] drivers/vfio: Allow EEH to be built as module

2014-08-06 Thread Gavin Shan
From: Alexey Kardashevskiy 

This adds necessary declarations to the SPAPR VFIO EEH module,
otherwise multiple dynamic linker errors reported:

vfio_spapr_eeh: Unknown symbol eeh_pe_set_option (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_configure (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_reset (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_get_state (err 0)
vfio_spapr_eeh: Unknown symbol eeh_iommu_group_to_pe (err 0)
vfio_spapr_eeh: Unknown symbol eeh_dev_open (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_set_option (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_configure (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_reset (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_get_state (err 0)
vfio_spapr_eeh: Unknown symbol eeh_iommu_group_to_pe (err 0)
vfio_spapr_eeh: Unknown symbol eeh_dev_open (err 0)

Signed-off-by: Alexey Kardashevskiy 
Signed-off-by: Gavin Shan 
---
 drivers/vfio/vfio_spapr_eeh.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 949f98e..4779cac 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -9,10 +9,15 @@
  * published by the Free Software Foundation.
  */
 
+#include 
 #include 
 #include 
 #include 
 
+#define DRIVER_VERSION "0.1"
+#define DRIVER_AUTHOR  "Gavin Shan, IBM Corporation"
+#define DRIVER_DESC"VFIO IOMMU SPAPR EEH"
+
 /* We might build address mapping here for "fast" path later */
 int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
@@ -88,3 +93,8 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
return ret;
 }
 EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 0/4] drivers/vfio: EEH Compile and compatibility

2014-08-06 Thread Gavin Shan
The patchset is mainly for fixing errors from building VFIO compoments
as dynamic modules. PATCH[4/4] allows VFIO can be used though EEH fails
to initialize for VFIO PCI devices.

Alexey Kardashevskiy (2):
  drivers/vfio: Allow EEH to be built as module
  drivers/vfio: Enable VFIO if EEH is not supported

Gavin Shan (2):
  powerpc/eeh: Export eeh_iommu_group_to_pe()
  drivers/vfio: Fix EEH build error

 arch/powerpc/kernel/eeh.c |  1 +
 drivers/vfio/Kconfig  |  6 ++
 drivers/vfio/Makefile |  2 +-
 drivers/vfio/pci/vfio_pci.c   |  6 +-
 drivers/vfio/vfio_spapr_eeh.c | 17 +++--
 include/linux/vfio.h  |  6 +++---
 6 files changed, 27 insertions(+), 11 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 4/4] drivers/vfio: Enable VFIO if EEH is not supported

2014-08-06 Thread Gavin Shan
From: Alexey Kardashevskiy 

The existing vfio_pci_open() fails upon error returned from
vfio_spapr_pci_eeh_open(), which breaks POWER7's P5IOC2 PHB
support which this patch brings back.

The patch fixes the issue by dropping the return value of
vfio_spapr_pci_eeh_open().

Signed-off-by: Alexey Kardashevskiy 
Signed-off-by: Gavin Shan 
---
v3: Drop return value of vfio_spapr_pci_eeh_open()
---
 drivers/vfio/pci/vfio_pci.c   | 6 +-
 drivers/vfio/vfio_spapr_eeh.c | 4 ++--
 include/linux/vfio.h  | 5 ++---
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e2ee80f..32d69c8 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -178,11 +178,7 @@ static int vfio_pci_open(void *device_data)
if (ret)
goto error;
 
-   ret = vfio_spapr_pci_eeh_open(vdev->pdev);
-   if (ret) {
-   vfio_pci_disable(vdev);
-   goto error;
-   }
+   vfio_spapr_pci_eeh_open(vdev->pdev);
}
 
return 0;
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 4779cac..86dfceb 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -19,9 +19,9 @@
 #define DRIVER_DESC"VFIO IOMMU SPAPR EEH"
 
 /* We might build address mapping here for "fast" path later */
-int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
-   return eeh_dev_open(pdev);
+   eeh_dev_open(pdev);
 }
 EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
 
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 224128a..d320411 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -100,15 +100,14 @@ extern long vfio_external_check_extension(struct 
vfio_group *group,
 
 struct pci_dev;
 #ifdef CONFIG_EEH
-extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
 extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
   unsigned int cmd,
   unsigned long arg);
 #else
-static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
-   return 0;
 }
 
 static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/4] drivers/vfio: Fix EEH build error

2014-08-06 Thread Gavin Shan
The VFIO related components could be built as dynamic modules.
Unfortunately, CONFIG_EEH can't be configured to "m". The patch
fixes the build errors when configuring VFIO related components
as dynamic modules as follows:

  CC [M]  drivers/vfio/vfio_iommu_spapr_tce.o
In file included from drivers/vfio/vfio.c:33:0:
include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \
inside parameter list [enabled by default]
   :
  WRAParch/powerpc/boot/zImage.pseries
  WRAParch/powerpc/boot/zImage.maple
  WRAParch/powerpc/boot/zImage.pmac
  WRAParch/powerpc/boot/zImage.epapr
  MODPOST 1818 modules
ERROR: ".vfio_spapr_iommu_eeh_ioctl" [drivers/vfio/vfio_iommu_spapr_tce.ko]\
undefined!
ERROR: ".vfio_spapr_pci_eeh_open" [drivers/vfio/pci/vfio-pci.ko] undefined!
ERROR: ".vfio_spapr_pci_eeh_release" [drivers/vfio/pci/vfio-pci.ko] undefined!

Reported-by: Alexey Kardashevskiy 
Signed-off-by: Gavin Shan 
Signed-off-by: Alexey Kardashevskiy 
---
v3: Introduce CONFIG_VFIO_SPAPR_EEH and add "struct pci_dev" in vfio.h
v2: remove #include  from vfio.c
---
 drivers/vfio/Kconfig  | 6 ++
 drivers/vfio/Makefile | 2 +-
 drivers/vfio/vfio_spapr_eeh.c | 3 +++
 include/linux/vfio.h  | 1 +
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index af7b204..d8c5763 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE
depends on VFIO && SPAPR_TCE_IOMMU
default n
 
+config VFIO_SPAPR_EEH
+   tristate
+   depends on EEH && VFIO_IOMMU_SPAPR_TCE
+   default n
+
 menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API
select VFIO_IOMMU_TYPE1 if X86
select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
+   select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
select ANON_INODES
help
  VFIO provides a framework for secure userspace device drivers.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 50e30bc..0b035b1 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
-obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
+obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index f834b4c..949f98e 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -18,11 +18,13 @@ int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
return eeh_dev_open(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
 
 void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
 {
eeh_dev_release(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
 
 long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
unsigned int cmd, unsigned long arg)
@@ -85,3 +87,4 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 
return ret;
 }
+EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 25a0fbd..224128a 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,6 +98,7 @@ extern int vfio_external_user_iommu_id(struct vfio_group 
*group);
 extern long vfio_external_check_extension(struct vfio_group *group,
  unsigned long arg);
 
+struct pci_dev;
 #ifdef CONFIG_EEH
 extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/4] powerpc/eeh: Export eeh_iommu_group_to_pe()

2014-08-06 Thread Gavin Shan
The function is used by VFIO driver, which might be built as a
dynamic module. So it should be exported.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kernel/eeh.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 6043879..59a64f8 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1254,6 +1254,7 @@ struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group 
*group)
 
return edev->pe;
 }
+EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe);
 
 #endif /* CONFIG_IOMMU_API */
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 4/4] drivers/vfio: Enable VFIO if EEH is not supported

2014-08-06 Thread Gavin Shan
On Wed, Aug 06, 2014 at 11:05:43PM +1000, Alexey Kardashevskiy wrote:
>On 08/06/2014 10:50 PM, Alex Williamson wrote:
>> On Wed, 2014-08-06 at 19:49 +1000, Gavin Shan wrote:
>>> From: Alexey Kardashevskiy 
>>>
>>> The existing vfio_pci_open() fails upon error returned from
>>> vfio_spapr_pci_eeh_open(), which breaks POWER7's P5IOC2 PHB
>>> support which this patch brings back.
>>>
>>> The patch fixes the issue by dropping the return value of
>>> vfio_spapr_pci_eeh_open().
>>>
>>> Signed-off-by: Alexey Kardashevskiy 
>>> Signed-off-by: Gavin Shan 
>>> ---
>>> v3: Drop return value of vfio_spapr_pci_eeh_open()
>>> ---
>>>  drivers/vfio/pci/vfio_pci.c   | 6 +-
>>>  drivers/vfio/vfio_spapr_eeh.c | 4 ++--
>>>  include/linux/vfio.h  | 5 ++---
>>>  3 files changed, 5 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
>>> index e2ee80f..32d69c8 100644
>>> --- a/drivers/vfio/pci/vfio_pci.c
>>> +++ b/drivers/vfio/pci/vfio_pci.c
>>> @@ -178,11 +178,7 @@ static int vfio_pci_open(void *device_data)
>>> if (ret)
>>> goto error;
>>>  
>>> -   ret = vfio_spapr_pci_eeh_open(vdev->pdev);
>>> -   if (ret) {
>>> -   vfio_pci_disable(vdev);
>>> -   goto error;
>>> -   }
>>> +   vfio_spapr_pci_eeh_open(vdev->pdev);
>>> }
>>>  
>>> return 0;
>>> diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
>>> index 4779cac..86dfceb 100644
>>> --- a/drivers/vfio/vfio_spapr_eeh.c
>>> +++ b/drivers/vfio/vfio_spapr_eeh.c
>>> @@ -19,9 +19,9 @@
>>>  #define DRIVER_DESC"VFIO IOMMU SPAPR EEH"
>>>  
>>>  /* We might build address mapping here for "fast" path later */
>>> -int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
>>> +void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
>>>  {
>>> -   return eeh_dev_open(pdev);
>>> +   eeh_dev_open(pdev);
>> 
>> Wasn't there some intent to provide a warning message, that would now be
>> done here?  Has that idea been dropped?
>
>
>Comrade Gavin just forgot it :)
>
>Gavin, please add it. Thanks!
>

Sure, I'll add following warning message in eeh_dev_open() in
separate patch in v4 in case nobody objects.

int eeh_dev_open(struct pci_dev *pdev)
{
:
/* No EEH device or PE ? */
edev = pci_dev_to_eeh_dev(pdev);
if (!edev || !edev->pe) {
pr_warn_once("%s: Device %s not supported\n",
 __func__, pci_name(pdev));
goto out;
}
:
}

Thanks,
Gavin

>
>
>> 
>>>  }
>>>  EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
>>>  
>>> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>>> index 224128a..d320411 100644
>>> --- a/include/linux/vfio.h
>>> +++ b/include/linux/vfio.h
>>> @@ -100,15 +100,14 @@ extern long vfio_external_check_extension(struct 
>>> vfio_group *group,
>>>  
>>>  struct pci_dev;
>>>  #ifdef CONFIG_EEH
>>> -extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
>>> +extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
>>>  extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
>>>  extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>>>unsigned int cmd,
>>>unsigned long arg);
>>>  #else
>>> -static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
>>> +static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
>>>  {
>>> -   return 0;
>>>  }
>>>  
>>>  static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
>> 
>> 
>> 
>
>
>-- 
>Alexey
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 4/4] drivers/vfio: Enable VFIO if EEH is not supported

2014-08-06 Thread Gavin Shan
On Thu, Aug 07, 2014 at 12:10:07PM +1000, Gavin Shan wrote:
>On Wed, Aug 06, 2014 at 11:05:43PM +1000, Alexey Kardashevskiy wrote:
>>On 08/06/2014 10:50 PM, Alex Williamson wrote:
>>> On Wed, 2014-08-06 at 19:49 +1000, Gavin Shan wrote:
>>>> From: Alexey Kardashevskiy 
>>>>
>>>> The existing vfio_pci_open() fails upon error returned from
>>>> vfio_spapr_pci_eeh_open(), which breaks POWER7's P5IOC2 PHB
>>>> support which this patch brings back.
>>>>
>>>> The patch fixes the issue by dropping the return value of
>>>> vfio_spapr_pci_eeh_open().
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy 
>>>> Signed-off-by: Gavin Shan 
>>>> ---
>>>> v3: Drop return value of vfio_spapr_pci_eeh_open()
>>>> ---
>>>>  drivers/vfio/pci/vfio_pci.c   | 6 +-
>>>>  drivers/vfio/vfio_spapr_eeh.c | 4 ++--
>>>>  include/linux/vfio.h  | 5 ++---
>>>>  3 files changed, 5 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
>>>> index e2ee80f..32d69c8 100644
>>>> --- a/drivers/vfio/pci/vfio_pci.c
>>>> +++ b/drivers/vfio/pci/vfio_pci.c
>>>> @@ -178,11 +178,7 @@ static int vfio_pci_open(void *device_data)
>>>>if (ret)
>>>>goto error;
>>>>  
>>>> -  ret = vfio_spapr_pci_eeh_open(vdev->pdev);
>>>> -  if (ret) {
>>>> -  vfio_pci_disable(vdev);
>>>> -  goto error;
>>>> -  }
>>>> +  vfio_spapr_pci_eeh_open(vdev->pdev);
>>>>}
>>>>  
>>>>return 0;
>>>> diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
>>>> index 4779cac..86dfceb 100644
>>>> --- a/drivers/vfio/vfio_spapr_eeh.c
>>>> +++ b/drivers/vfio/vfio_spapr_eeh.c
>>>> @@ -19,9 +19,9 @@
>>>>  #define DRIVER_DESC   "VFIO IOMMU SPAPR EEH"
>>>>  
>>>>  /* We might build address mapping here for "fast" path later */
>>>> -int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
>>>> +void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
>>>>  {
>>>> -  return eeh_dev_open(pdev);
>>>> +  eeh_dev_open(pdev);
>>> 
>>> Wasn't there some intent to provide a warning message, that would now be
>>> done here?  Has that idea been dropped?
>>
>>
>>Comrade Gavin just forgot it :)
>>
>>Gavin, please add it. Thanks!
>>
>
>Sure, I'll add following warning message in eeh_dev_open() in
>separate patch in v4 in case nobody objects.
>
>int eeh_dev_open(struct pci_dev *pdev)
>{
>:
>/* No EEH device or PE ? */
>edev = pci_dev_to_eeh_dev(pdev);
>if (!edev || !edev->pe) {
>   pr_warn_once("%s: Device %s not supported\n",
>__func__, pci_name(pdev));
>goto out;
>}
>:
>}
>

Well, I added the warning message in eeh_dev_open() and sending "v4" out.

Thanks,
Gavin

>
>>
>>
>>> 
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
>>>>  
>>>> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>>>> index 224128a..d320411 100644
>>>> --- a/include/linux/vfio.h
>>>> +++ b/include/linux/vfio.h
>>>> @@ -100,15 +100,14 @@ extern long vfio_external_check_extension(struct 
>>>> vfio_group *group,
>>>>  
>>>>  struct pci_dev;
>>>>  #ifdef CONFIG_EEH
>>>> -extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
>>>> +extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
>>>>  extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
>>>>  extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>>>>   unsigned int cmd,
>>>>   unsigned long arg);
>>>>  #else
>>>> -static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
>>>> +static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
>>>>  {
>>>> -  return 0;
>>>>  }
>>>>  
>>>>  static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
>>> 
>>> 
>>> 
>>
>>
>>-- 
>>Alexey
>>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 1/5] powerpc/eeh: Export eeh_iommu_group_to_pe()

2014-08-06 Thread Gavin Shan
The function is used by VFIO driver, which might be built as a
dynamic module. So it should be exported.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kernel/eeh.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 6043879..59a64f8 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1254,6 +1254,7 @@ struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group 
*group)
 
return edev->pe;
 }
+EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe);
 
 #endif /* CONFIG_IOMMU_API */
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 2/5] powerpc/eeh: Add warning message in eeh_dev_open()

2014-08-06 Thread Gavin Shan
The patch adds one warning message in eeh_dev_open() in case the
PCI device can't be marked as passed through.

Suggested-by: Alexey Kardashevskiy 
Signed-off-by: Gavin Shan 
---
 arch/powerpc/kernel/eeh.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 59a64f8..5d73a49 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1162,8 +1162,11 @@ int eeh_dev_open(struct pci_dev *pdev)
 
/* No EEH device or PE ? */
edev = pci_dev_to_eeh_dev(pdev);
-   if (!edev || !edev->pe)
+   if (!edev || !edev->pe) {
+   pr_warn_once("%s: PCI device %s not supported\n",
+__func__, pci_name(pdev));
goto out;
+   }
 
/* Increase PE's pass through count */
atomic_inc(&edev->pe->pass_dev_cnt);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 3/5] drivers/vfio: Fix EEH build error

2014-08-06 Thread Gavin Shan
The VFIO related components could be built as dynamic modules.
Unfortunately, CONFIG_EEH can't be configured to "m". The patch
fixes the build errors when configuring VFIO related components
as dynamic modules as follows:

  CC [M]  drivers/vfio/vfio_iommu_spapr_tce.o
In file included from drivers/vfio/vfio.c:33:0:
include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \
inside parameter list [enabled by default]
   :
  WRAParch/powerpc/boot/zImage.pseries
  WRAParch/powerpc/boot/zImage.maple
  WRAParch/powerpc/boot/zImage.pmac
  WRAParch/powerpc/boot/zImage.epapr
  MODPOST 1818 modules
ERROR: ".vfio_spapr_iommu_eeh_ioctl" [drivers/vfio/vfio_iommu_spapr_tce.ko]\
undefined!
ERROR: ".vfio_spapr_pci_eeh_open" [drivers/vfio/pci/vfio-pci.ko] undefined!
ERROR: ".vfio_spapr_pci_eeh_release" [drivers/vfio/pci/vfio-pci.ko] undefined!

Reported-by: Alexey Kardashevskiy 
Signed-off-by: Gavin Shan 
Signed-off-by: Alexey Kardashevskiy 
---
v3: Introduce CONFIG_VFIO_SPAPR_EEH and add "struct pci_dev" in vfio.h
v2: remove #include  from vfio.c
---
 drivers/vfio/Kconfig  | 6 ++
 drivers/vfio/Makefile | 2 +-
 drivers/vfio/vfio_spapr_eeh.c | 3 +++
 include/linux/vfio.h  | 1 +
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index af7b204..d8c5763 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE
depends on VFIO && SPAPR_TCE_IOMMU
default n
 
+config VFIO_SPAPR_EEH
+   tristate
+   depends on EEH && VFIO_IOMMU_SPAPR_TCE
+   default n
+
 menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API
select VFIO_IOMMU_TYPE1 if X86
select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
+   select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
select ANON_INODES
help
  VFIO provides a framework for secure userspace device drivers.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 50e30bc..0b035b1 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
-obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
+obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index f834b4c..949f98e 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -18,11 +18,13 @@ int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
return eeh_dev_open(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
 
 void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
 {
eeh_dev_release(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
 
 long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
unsigned int cmd, unsigned long arg)
@@ -85,3 +87,4 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 
return ret;
 }
+EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 25a0fbd..224128a 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,6 +98,7 @@ extern int vfio_external_user_iommu_id(struct vfio_group 
*group);
 extern long vfio_external_check_extension(struct vfio_group *group,
  unsigned long arg);
 
+struct pci_dev;
 #ifdef CONFIG_EEH
 extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 5/5] drivers/vfio: Enable VFIO if EEH is not supported

2014-08-06 Thread Gavin Shan
From: Alexey Kardashevskiy 

The existing vfio_pci_open() fails upon error returned from
vfio_spapr_pci_eeh_open(), which breaks POWER7's P5IOC2 PHB
support which this patch brings back.

The patch fixes the issue by dropping the return value of
vfio_spapr_pci_eeh_open().

Signed-off-by: Alexey Kardashevskiy 
Signed-off-by: Gavin Shan 
---
v3: Drop return value of vfio_spapr_pci_eeh_open()
v4: Add warning message in eeh_dev_open() in PATCH[2/5]
---
 drivers/vfio/pci/vfio_pci.c   | 6 +-
 drivers/vfio/vfio_spapr_eeh.c | 4 ++--
 include/linux/vfio.h  | 5 ++---
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e2ee80f..32d69c8 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -178,11 +178,7 @@ static int vfio_pci_open(void *device_data)
if (ret)
goto error;
 
-   ret = vfio_spapr_pci_eeh_open(vdev->pdev);
-   if (ret) {
-   vfio_pci_disable(vdev);
-   goto error;
-   }
+   vfio_spapr_pci_eeh_open(vdev->pdev);
}
 
return 0;
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 4779cac..86dfceb 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -19,9 +19,9 @@
 #define DRIVER_DESC"VFIO IOMMU SPAPR EEH"
 
 /* We might build address mapping here for "fast" path later */
-int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
-   return eeh_dev_open(pdev);
+   eeh_dev_open(pdev);
 }
 EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
 
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 224128a..d320411 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -100,15 +100,14 @@ extern long vfio_external_check_extension(struct 
vfio_group *group,
 
 struct pci_dev;
 #ifdef CONFIG_EEH
-extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
 extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
   unsigned int cmd,
   unsigned long arg);
 #else
-static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
-   return 0;
 }
 
 static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 0/5] drivers/vfio: EEH Compile and compatibility

2014-08-06 Thread Gavin Shan
The patchset is mainly for fixing errors from building VFIO compoments
as dynamic modules. PATCH[4/4] allows VFIO can be used though EEH fails
to initialize for VFIO PCI devices.


Alexey Kardashevskiy (2):
  drivers/vfio: Allow EEH to be built as module
  drivers/vfio: Enable VFIO if EEH is not supported

Gavin Shan (3):
  powerpc/eeh: Export eeh_iommu_group_to_pe()
  powerpc/eeh: Add warning message in eeh_dev_open()
  drivers/vfio: Fix EEH build error

 arch/powerpc/kernel/eeh.c |  6 +-
 drivers/vfio/Kconfig  |  6 ++
 drivers/vfio/Makefile |  2 +-
 drivers/vfio/pci/vfio_pci.c   |  6 +-
 drivers/vfio/vfio_spapr_eeh.c | 17 +++--
 include/linux/vfio.h  |  6 +++---
 6 files changed, 31 insertions(+), 12 deletions(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 4/5] drivers/vfio: Allow EEH to be built as module

2014-08-06 Thread Gavin Shan
From: Alexey Kardashevskiy 

This adds necessary declarations to the SPAPR VFIO EEH module,
otherwise multiple dynamic linker errors reported:

vfio_spapr_eeh: Unknown symbol eeh_pe_set_option (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_configure (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_reset (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_get_state (err 0)
vfio_spapr_eeh: Unknown symbol eeh_iommu_group_to_pe (err 0)
vfio_spapr_eeh: Unknown symbol eeh_dev_open (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_set_option (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_configure (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_reset (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_get_state (err 0)
vfio_spapr_eeh: Unknown symbol eeh_iommu_group_to_pe (err 0)
vfio_spapr_eeh: Unknown symbol eeh_dev_open (err 0)

Signed-off-by: Alexey Kardashevskiy 
Signed-off-by: Gavin Shan 
---
 drivers/vfio/vfio_spapr_eeh.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 949f98e..4779cac 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -9,10 +9,15 @@
  * published by the Free Software Foundation.
  */
 
+#include 
 #include 
 #include 
 #include 
 
+#define DRIVER_VERSION "0.1"
+#define DRIVER_AUTHOR  "Gavin Shan, IBM Corporation"
+#define DRIVER_DESC"VFIO IOMMU SPAPR EEH"
+
 /* We might build address mapping here for "fast" path later */
 int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
@@ -88,3 +93,8 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
return ret;
 }
 EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] drivers/vfio: Support EEH error injection

2015-03-10 Thread Gavin Shan
The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
to inject the specified EEH error, which is represented by
(struct vfio_eeh_pe_err), to the indicated PE for testing purpose.

Signed-off-by: Gavin Shan 
---
 Documentation/vfio.txt| 47 ++-
 drivers/vfio/vfio_spapr_eeh.c | 14 +
 include/uapi/linux/vfio.h | 34 ++-
 3 files changed, 80 insertions(+), 15 deletions(-)

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978ec..2e7f736 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
 
 The code flow from the example above should be slightly changed:
 
-   struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
+   struct vfio_eeh_pe_op *pe_op;
+   struct vfio_eeh_pe_err *pe_err;
+
+   pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
+   pe_err = (void *)pe_op + sizeof(*pe_op);
+   pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
+   pe_op->flags = 0;
 
.
/* Add the group to the container */
@@ -367,8 +373,8 @@ The code flow from the example above should be slightly 
changed:
ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
 
/* Enable the EEH functionality on the device */
-   pe_op.op = VFIO_EEH_PE_ENABLE;
-   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+   pe_op->op = VFIO_EEH_PE_ENABLE;
+   ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
/* You're suggested to create additional data struct to represent
 * PE, and put child devices belonging to same IOMMU group to the
@@ -376,8 +382,8 @@ The code flow from the example above should be slightly 
changed:
 */
 
/* Check the PE's state and make sure it's in functional state */
-   pe_op.op = VFIO_EEH_PE_GET_STATE;
-   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+   pe_op->op = VFIO_EEH_PE_GET_STATE;
+   ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
/* Save device state using pci_save_state().
 * EEH should be enabled on the specified device.
@@ -385,11 +391,24 @@ The code flow from the example above should be slightly 
changed:
 

 
+   /* Inject EEH error, which is expected to be caused by 32-bits
+* config load.
+*/
+   pe_err->type = VFIO_EEH_ERR_TYPE_32;
+   pe_err->func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
+   pe_err->addr = 0ul;
+   pe_err->mask = 0ul;
+   pe_op->op = VFIO_EEH_PE_INJECT_ERR;
+   ioctl(container, VFIO_EEH_PE_OP, pe_op);
+
+   
+
/* When 0xFF's returned from reading PCI config space or IO BARs
 * of the PCI device. Check the PE's state to see if that has been
 * frozen.
 */
-   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+   pe_op->op = VFIO_EEH_PE_GET_STATE;
+   ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
/* Waiting for pending PCI transactions to be completed and don't
 * produce any more PCI traffic from/to the affected PE until
@@ -400,22 +419,22 @@ The code flow from the example above should be slightly 
changed:
 * standard part of PCI config space, AER registers are dumped
 * as logs for further analysis.
 */
-   pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
-   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+   pe_op->op = VFIO_EEH_PE_UNFREEZE_IO;
+   ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
/*
 * Issue PE reset: hot or fundamental reset. Usually, hot reset
 * is enough. However, the firmware of some PCI adapters would
 * require fundamental reset.
 */
-   pe_op.op = VFIO_EEH_PE_RESET_HOT;
-   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
-   pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
-   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+   pe_op->op = VFIO_EEH_PE_RESET_HOT;
+   ioctl(container, VFIO_EEH_PE_OP, pe_op);
+   pe_op->op = VFIO_EEH_PE_RESET_DEACTIVATE;
+   ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
/* Configure the PCI bridges for the affected PE */
-   pe_op.op = VFIO_EEH_PE_CONFIGURE;
-   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+   pe_op->op = VFIO_EEH_PE_CONFIGURE;
+   ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
/* Restored state we saved at initialization time. pci_restore_state()
 * is good enough as an example.
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db..4f1ebc1 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,20 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
case VFIO_EEH_PE_CONFIGURE:
ret = eeh_pe_configure(pe);
break;
+   case VFIO_EEH_PE_INJECT_ERR: 

[PATCH 1/2] powerpc/eeh: Introduce eeh_pe_inject_err()

2015-03-10 Thread Gavin Shan
The patch defines PCI error types and functions in eeh.h and
exports function eeh_pe_inject_err(), which will be called by
VFIO driver to inject the specified PCI error to the indicated
PE for testing purpose.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h | 24 
 arch/powerpc/kernel/eeh.c  | 63 ++
 2 files changed, 87 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9de87ce..eb20c62 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -196,6 +196,28 @@ enum {
 #define EEH_RESET_COMPLETE 4   /* PHB complete reset   */
 #define EEH_LOG_TEMP   1   /* EEH temporary error log  */
 #define EEH_LOG_PERM   2   /* EEH permanent error log  */
+#define EEH_ERR_TYPE_320   /* 32-bits PCI error*/
+#define EEH_ERR_TYPE_641   /* 64-bits PCI error*/
+#define EEH_ERR_FUNC_LD_MEM_ADDR   0   /* Memory load  */
+#define EEH_ERR_FUNC_LD_MEM_DATA   1
+#define EEH_ERR_FUNC_LD_IO_ADDR2   /* IO load  */
+#define EEH_ERR_FUNC_LD_IO_DATA3
+#define EEH_ERR_FUNC_LD_CFG_ADDR   4   /* Config load  */
+#define EEH_ERR_FUNC_LD_CFG_DATA   5
+#define EEH_ERR_FUNC_ST_MEM_ADDR   6   /* Memory store */
+#define EEH_ERR_FUNC_ST_MEM_DATA   7
+#define EEH_ERR_FUNC_ST_IO_ADDR8   /* IO store */
+#define EEH_ERR_FUNC_ST_IO_DATA9
+#define EEH_ERR_FUNC_ST_CFG_ADDR   10  /* Config store */
+#define EEH_ERR_FUNC_ST_CFG_DATA   11
+#define EEH_ERR_FUNC_DMA_RD_ADDR   12  /* DMA read */
+#define EEH_ERR_FUNC_DMA_RD_DATA   13
+#define EEH_ERR_FUNC_DMA_RD_MASTER 14
+#define EEH_ERR_FUNC_DMA_RD_TARGET 15
+#define EEH_ERR_FUNC_DMA_WR_ADDR   16  /* DMA write*/
+#define EEH_ERR_FUNC_DMA_WR_DATA   17
+#define EEH_ERR_FUNC_DMA_WR_MASTER 18
+#define EEH_ERR_FUNC_DMA_WR_TARGET 19
 
 struct eeh_ops {
char *name;
@@ -296,6 +318,8 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option);
 int eeh_pe_get_state(struct eeh_pe *pe);
 int eeh_pe_reset(struct eeh_pe *pe, int option);
 int eeh_pe_configure(struct eeh_pe *pe);
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 60a0f15ce..dbab1a4 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1657,6 +1657,69 @@ int eeh_pe_configure(struct eeh_pe *pe)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_configure);
 
+/**
+ * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @function: error function
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject the specified PCI error, which
+ * is determined by @type and @function, to the indicated PE for
+ * testing purpose.
+ */
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask)
+{
+   /* Invalid PE ? */
+   if (!pe)
+   return -ENODEV;
+
+   /* Unsupported operation ? */
+   if (!eeh_ops || !eeh_ops->err_inject)
+   return -ENOENT;
+
+   /* Check on PCI error type */
+   switch (type) {
+   case EEH_ERR_TYPE_32:
+   case EEH_ERR_TYPE_64:
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   /* Check on PCI error function */
+   switch (func) {
+   case EEH_ERR_FUNC_LD_MEM_ADDR:
+   case EEH_ERR_FUNC_LD_MEM_DATA:
+   case EEH_ERR_FUNC_LD_IO_ADDR:
+   case EEH_ERR_FUNC_LD_IO_DATA:
+   case EEH_ERR_FUNC_LD_CFG_ADDR:
+   case EEH_ERR_FUNC_LD_CFG_DATA:
+   case EEH_ERR_FUNC_ST_MEM_ADDR:
+   case EEH_ERR_FUNC_ST_MEM_DATA:
+   case EEH_ERR_FUNC_ST_IO_ADDR:
+   case EEH_ERR_FUNC_ST_IO_DATA:
+   case EEH_ERR_FUNC_ST_CFG_ADDR:
+   case EEH_ERR_FUNC_ST_CFG_DATA:
+   case EEH_ERR_FUNC_DMA_RD_ADDR:
+   case EEH_ERR_FUNC_DMA_RD_DATA:
+   case EEH_ERR_FUNC_DMA_RD_MASTER:
+   case EEH_ERR_FUNC_DMA_RD_TARGET:
+   case EEH_ERR_FUNC_DMA_WR_ADDR:
+   case EEH_ERR_FUNC_DMA_WR_DATA:
+   case EEH_ERR_FUNC_DMA_WR_MASTER:
+   case EEH_ERR_FUNC_DMA_WR_TARGET:
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   return eeh_ops->err_inject(pe, type, func, addr, mask);
+}
+EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
if (!eeh_enabled()) {
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http:/

Re: [PATCH 2/2] drivers/vfio: Support EEH error injection

2015-03-11 Thread Gavin Shan
On Thu, Mar 12, 2015 at 11:57:21AM +1100, David Gibson wrote:
>On Wed, Mar 11, 2015 at 05:34:11PM +1100, Gavin Shan wrote:
>> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> to inject the specified EEH error, which is represented by
>> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> 
>> Signed-off-by: Gavin Shan 
>> ---
>>  Documentation/vfio.txt| 47 
>> ++-
>>  drivers/vfio/vfio_spapr_eeh.c | 14 +
>>  include/uapi/linux/vfio.h | 34 ++-
>>  3 files changed, 80 insertions(+), 15 deletions(-)
>> 
>> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> index 96978ec..2e7f736 100644
>> --- a/Documentation/vfio.txt
>> +++ b/Documentation/vfio.txt
>> @@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
>>  
>>  The code flow from the example above should be slightly changed:
>>  
>> -struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
>> +struct vfio_eeh_pe_op *pe_op;
>> +struct vfio_eeh_pe_err *pe_err;
>> +
>> +pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
>> +pe_err = (void *)pe_op + sizeof(*pe_op);
>> +pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
>
>Surely that argsz can't be correct for most of the operations.  The
>extended structure should only be there for the error inject ioctl,
>yes?
>

argsz isn't appropriate for most cases because kernel has the check
"expected_argsz < passed_argsz", not "expected_argsz == passed_argsz".
However, I'll fix it as follows to avoid confusion after collecting
more comments:

struct vfio_eeh_pe_op *pe_op;
struct vfio_eeh_pe_err *pe_err;

/* For all cases except error injection */
pe_op = malloc(sizeof(*pe_op));
pe_op->argsz = sizeof(*pe_op);

/* For error injection case here */
pe_op = realloc(sizeof(*pe_op) + sizeof(*pe_err));
pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
pe_err = (void *)pe_op + sizeof(*pe_op);

Thanks,
Gavin

>-- 
>David Gibson   | I'll have my music baroque, and my code
>david AT gibson.dropbear.id.au | minimalist, thank you.  NOT _the_ _other_
>   | _way_ _around_!
>http://www.ozlabs.org/~dgibson


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] drivers/vfio: Support EEH error injection

2015-03-11 Thread Gavin Shan
On Thu, Mar 12, 2015 at 03:21:29PM +1100, David Gibson wrote:
>On Thu, Mar 12, 2015 at 02:16:42PM +1100, Gavin Shan wrote:
>> On Thu, Mar 12, 2015 at 11:57:21AM +1100, David Gibson wrote:
>> >On Wed, Mar 11, 2015 at 05:34:11PM +1100, Gavin Shan wrote:
>> >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> >> to inject the specified EEH error, which is represented by
>> >> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> >> 
>> >> Signed-off-by: Gavin Shan 
>> >> ---
>> >>  Documentation/vfio.txt| 47 
>> >> ++-
>> >>  drivers/vfio/vfio_spapr_eeh.c | 14 +
>> >>  include/uapi/linux/vfio.h | 34 ++-
>> >>  3 files changed, 80 insertions(+), 15 deletions(-)
>> >> 
>> >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> >> index 96978ec..2e7f736 100644
>> >> --- a/Documentation/vfio.txt
>> >> +++ b/Documentation/vfio.txt
>> >> @@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
>> >>  
>> >>  The code flow from the example above should be slightly changed:
>> >>  
>> >> - struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
>> >> + struct vfio_eeh_pe_op *pe_op;
>> >> + struct vfio_eeh_pe_err *pe_err;
>> >> +
>> >> + pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
>> >> + pe_err = (void *)pe_op + sizeof(*pe_op);
>> >> + pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
>> >
>> >Surely that argsz can't be correct for most of the operations.  The
>> >extended structure should only be there for the error inject ioctl,
>> >yes?
>> >
>> 
>> argsz isn't appropriate for most cases because kernel has the check
>> "expected_argsz < passed_argsz", not "expected_argsz ==
>> passed_argsz".
>
>It works for now, but if any of those calls was extended with more
>data, it would break horribly.  By setting the argsz greater than
>necessary, you're effectively passing uninitialized data to the
>ioctl().  At the moment, the ioctl() ignores it, but the whole point
>of the argsz value is that in the future, it might not.
>

Thank you for more explanation. I agree that it's worthy to pass precise
argument size. I'll fix it as below in next revision:

>> However, I'll fix it as follows to avoid confusion after collecting
>> more comments:
>> 
>>  struct vfio_eeh_pe_op *pe_op;
>>  struct vfio_eeh_pe_err *pe_err;
>> 
>>  /* For all cases except error injection */
>>  pe_op = malloc(sizeof(*pe_op));
>>  pe_op->argsz = sizeof(*pe_op);
>> 
>>  /* For error injection case here */
>>  pe_op = realloc(sizeof(*pe_op) + sizeof(*pe_err));
>>  pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
>>  pe_err = (void *)pe_op + sizeof(*pe_op);
>> 

Thanks,
Gavin

>
>-- 
>David Gibson   | I'll have my music baroque, and my code
>david AT gibson.dropbear.id.au | minimalist, thank you.  NOT _the_ _other_
>   | _way_ _around_!
>http://www.ozlabs.org/~dgibson


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] powerpc/eeh: Introduce eeh_pe_inject_err()

2015-03-15 Thread Gavin Shan
On Fri, Mar 13, 2015 at 02:28:33PM -0600, Alex Williamson wrote:
>On Wed, 2015-03-11 at 17:34 +1100, Gavin Shan wrote:
>> The patch defines PCI error types and functions in eeh.h and
>> exports function eeh_pe_inject_err(), which will be called by
>> VFIO driver to inject the specified PCI error to the indicated
>> PE for testing purpose.
>> 
>> Signed-off-by: Gavin Shan 
>> ---
>>  arch/powerpc/include/asm/eeh.h | 24 
>>  arch/powerpc/kernel/eeh.c  | 63 
>> ++
>>  2 files changed, 87 insertions(+)
>> 
>> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>> index 9de87ce..eb20c62 100644
>> --- a/arch/powerpc/include/asm/eeh.h
>> +++ b/arch/powerpc/include/asm/eeh.h
>> @@ -196,6 +196,28 @@ enum {
>>  #define EEH_RESET_COMPLETE  4   /* PHB complete reset   */
>>  #define EEH_LOG_TEMP1   /* EEH temporary error log  
>> */
>>  #define EEH_LOG_PERM2   /* EEH permanent error log  
>> */
>> +#define EEH_ERR_TYPE_32 0   /* 32-bits PCI error*/
>> +#define EEH_ERR_TYPE_64 1   /* 64-bits PCI error*/
>> +#define EEH_ERR_FUNC_LD_MEM_ADDR0   /* Memory load  */
>> +#define EEH_ERR_FUNC_LD_MEM_DATA1
>> +#define EEH_ERR_FUNC_LD_IO_ADDR 2   /* IO load  */
>> +#define EEH_ERR_FUNC_LD_IO_DATA 3
>> +#define EEH_ERR_FUNC_LD_CFG_ADDR4   /* Config load  */
>> +#define EEH_ERR_FUNC_LD_CFG_DATA5
>> +#define EEH_ERR_FUNC_ST_MEM_ADDR6   /* Memory store */
>> +#define EEH_ERR_FUNC_ST_MEM_DATA7
>> +#define EEH_ERR_FUNC_ST_IO_ADDR 8   /* IO store */
>> +#define EEH_ERR_FUNC_ST_IO_DATA 9
>> +#define EEH_ERR_FUNC_ST_CFG_ADDR10  /* Config store */
>> +#define EEH_ERR_FUNC_ST_CFG_DATA11
>> +#define EEH_ERR_FUNC_DMA_RD_ADDR12  /* DMA read */
>> +#define EEH_ERR_FUNC_DMA_RD_DATA13
>> +#define EEH_ERR_FUNC_DMA_RD_MASTER  14
>> +#define EEH_ERR_FUNC_DMA_RD_TARGET  15
>> +#define EEH_ERR_FUNC_DMA_WR_ADDR16  /* DMA write*/
>> +#define EEH_ERR_FUNC_DMA_WR_DATA17
>> +#define EEH_ERR_FUNC_DMA_WR_MASTER  18
>> +#define EEH_ERR_FUNC_DMA_WR_TARGET  19
>>  
>>  struct eeh_ops {
>>  char *name;
>> @@ -296,6 +318,8 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option);
>>  int eeh_pe_get_state(struct eeh_pe *pe);
>>  int eeh_pe_reset(struct eeh_pe *pe, int option);
>>  int eeh_pe_configure(struct eeh_pe *pe);
>> +int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>> +  unsigned long addr, unsigned long mask);
>>  
>>  /**
>>   * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
>> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>> index 60a0f15ce..dbab1a4 100644
>> --- a/arch/powerpc/kernel/eeh.c
>> +++ b/arch/powerpc/kernel/eeh.c
>> @@ -1657,6 +1657,69 @@ int eeh_pe_configure(struct eeh_pe *pe)
>>  }
>>  EXPORT_SYMBOL_GPL(eeh_pe_configure);
>>  
>> +/**
>> + * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
>> + * @pe: the indicated PE
>> + * @type: error type
>> + * @function: error function
>> + * @addr: address
>> + * @mask: address mask
>> + *
>> + * The routine is called to inject the specified PCI error, which
>> + * is determined by @type and @function, to the indicated PE for
>> + * testing purpose.
>> + */
>> +int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
>> +  unsigned long addr, unsigned long mask)
>> +{
>> +/* Invalid PE ? */
>> +if (!pe)
>> +return -ENODEV;
>> +
>> +/* Unsupported operation ? */
>> +if (!eeh_ops || !eeh_ops->err_inject)
>> +return -ENOENT;
>> +
>> +/* Check on PCI error type */
>> +switch (type) {
>> +case EEH_ERR_TYPE_32:
>> +case EEH_ERR_TYPE_64:
>> +break;
>> +default:
>> +return -EINVAL;
>> +}
>> +
>> +/* Check on PCI error function */
>> +switch (func) {
>> +case EEH_ERR_FUNC_LD_MEM_ADDR:
>> +case EEH_ERR_FUNC_LD_MEM_DATA:
>> +case EEH_ERR_FUNC_LD_IO_ADDR:
>> +case EEH_ERR_FUNC_LD_IO_DATA:
>> +case EEH_ERR_FUNC_LD_CFG_ADDR:
>> +case EEH_ERR_FUNC_LD_CFG_DATA:
>> +case EEH_ERR_FUNC_ST_MEM_ADDR:
>> +case EEH_ERR_FUNC_ST_MEM_DAT

Re: [PATCH 2/2] drivers/vfio: Support EEH error injection

2015-03-15 Thread Gavin Shan
On Fri, Mar 13, 2015 at 02:28:09PM -0600, Alex Williamson wrote:
>On Thu, 2015-03-12 at 15:21 +1100, David Gibson wrote:
>> On Thu, Mar 12, 2015 at 02:16:42PM +1100, Gavin Shan wrote:
>> > On Thu, Mar 12, 2015 at 11:57:21AM +1100, David Gibson wrote:
>> > >On Wed, Mar 11, 2015 at 05:34:11PM +1100, Gavin Shan wrote:
>> > >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> > >> to inject the specified EEH error, which is represented by
>> > >> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> > >> 
>> > >> Signed-off-by: Gavin Shan 
>> > >> ---
>> > >>  Documentation/vfio.txt| 47 
>> > >> ++-
>> > >>  drivers/vfio/vfio_spapr_eeh.c | 14 +
>> > >>  include/uapi/linux/vfio.h | 34 ++-
>> > >>  3 files changed, 80 insertions(+), 15 deletions(-)
>> > >> 
>> > >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> > >> index 96978ec..2e7f736 100644
>> > >> --- a/Documentation/vfio.txt
>> > >> +++ b/Documentation/vfio.txt
>> > >> @@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
>> > >>  
>> > >>  The code flow from the example above should be slightly changed:
>> > >>  
>> > >> -   struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags 
>> > >> = 0 };
>> > >> +   struct vfio_eeh_pe_op *pe_op;
>> > >> +   struct vfio_eeh_pe_err *pe_err;
>> > >> +
>> > >> +   pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
>> > >> +   pe_err = (void *)pe_op + sizeof(*pe_op);
>> > >> +   pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
>> > >
>> > >Surely that argsz can't be correct for most of the operations.  The
>> > >extended structure should only be there for the error inject ioctl,
>> > >yes?
>> > >
>> > 
>> > argsz isn't appropriate for most cases because kernel has the check
>> > "expected_argsz < passed_argsz", not "expected_argsz ==
>> > passed_argsz".
>> 
>> It works for now, but if any of those calls was extended with more
>> data, it would break horribly.  By setting the argsz greater than
>> necessary, you're effectively passing uninitialized data to the
>> ioctl().  At the moment, the ioctl() ignores it, but the whole point
>> of the argsz value is that in the future, it might not.
>
>argsz tells us how much data the user is passing, we're always going to
>need to figure out what the extra data is, so I don't really see the
>point of this objection.  In fact, it might make use of this interface
>quite a bit easier if vfio_eeh_pe_op ended with a union including
>vfio_eeh_pe_err.  op == VFIO_EEH_PE_INJECT_ERR defines that the user has
>passed vfio_eeh_pe_err in the union, other ops may add new unions later.
>Thanks,
>

Ok. I'll have following data struct in next revision:

struct vfio_eeh_pe_err {
__u32 type;
__u32 func;
__u64 addr;
__u64 mask;
};

struct vfio_eeh_pe_op {
__u32 argsz;
__u32 flags;
__u32 op;
union {
struct vfio_eeh_pe_err err;
};
};

Thanks,
Gavin

>Alex
>
>> > However, I'll fix it as follows to avoid confusion after collecting
>> > more comments:
>> > 
>> >struct vfio_eeh_pe_op *pe_op;
>> >struct vfio_eeh_pe_err *pe_err;
>> > 
>> >/* For all cases except error injection */
>> >pe_op = malloc(sizeof(*pe_op));
>> >pe_op->argsz = sizeof(*pe_op);
>> > 
>> >/* For error injection case here */
>> >pe_op = realloc(sizeof(*pe_op) + sizeof(*pe_err));
>> >pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
>> >pe_err = (void *)pe_op + sizeof(*pe_op);
>> > 
>> > Thanks,
>> > Gavin
>> > 
>> > 
>> > 
>> 
>
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] drivers/vfio: Support EEH error injection

2015-03-15 Thread Gavin Shan
On Fri, Mar 13, 2015 at 02:35:18PM -0600, Alex Williamson wrote:
>On Wed, 2015-03-11 at 17:34 +1100, Gavin Shan wrote:
>> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> to inject the specified EEH error, which is represented by
>> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> 
>> Signed-off-by: Gavin Shan 
>> ---
>>  Documentation/vfio.txt| 47 
>> ++-
>>  drivers/vfio/vfio_spapr_eeh.c | 14 +
>>  include/uapi/linux/vfio.h | 34 ++-
>>  3 files changed, 80 insertions(+), 15 deletions(-)
>> 
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>[snip]
>> @@ -490,6 +499,29 @@ struct vfio_eeh_pe_op {
>>  #define VFIO_EEH_PE_RESET_HOT   6   /* Assert hot reset 
>>  */
>>  #define VFIO_EEH_PE_RESET_FUNDAMENTAL   7   /* Assert fundamental 
>> reset  */
>>  #define VFIO_EEH_PE_CONFIGURE   8   /* PE configuration 
>>  */
>> +#define VFIO_EEH_PE_INJECT_ERR  9   /* Inject EEH error 
>>  */
>> +#define  VFIO_EEH_ERR_TYPE_32   0   /* 32-bits EEH error 
>> type*/
>> +#define  VFIO_EEH_ERR_TYPE_64   1   /* 64-bits EEH error 
>> type*/
>> +#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR  0   /* Memory load  
>> */
>> +#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA  1
>> +#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR   2   /* IO load  
>> */
>> +#define  VFIO_EEH_ERR_FUNC_LD_IO_DATA   3
>> +#define  VFIO_EEH_ERR_FUNC_LD_CFG_ADDR  4   /* Config load  
>> */
>> +#define  VFIO_EEH_ERR_FUNC_LD_CFG_DATA  5
>> +#define  VFIO_EEH_ERR_FUNC_ST_MEM_ADDR  6   /* Memory store 
>> */
>> +#define  VFIO_EEH_ERR_FUNC_ST_MEM_DATA  7
>> +#define  VFIO_EEH_ERR_FUNC_ST_IO_ADDR   8   /* IO store 
>> */
>> +#define  VFIO_EEH_ERR_FUNC_ST_IO_DATA   9
>> +#define  VFIO_EEH_ERR_FUNC_ST_CFG_ADDR  10  /* Config store 
>> */
>> +#define  VFIO_EEH_ERR_FUNC_ST_CFG_DATA  11
>> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_ADDR  12  /* DMA read 
>> */
>> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_DATA  13
>> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_MASTER14
>> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_TARGET15
>> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_ADDR  16  /* DMA write
>> */
>> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_DATA  17
>> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_MASTER18
>> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_TARGET19
>
>This data duplication from patch 1/2 is kind of concerning.  In one case
>we're adding to arch/powerpc/include/asm/eeh.h, which is a kernel
>internal interface and entirely changeable, in the other we're matching
>those current definitions in uapi, which needs to be stable.  Are these
>indexes part of a spec that we can rely on them being stable or do we
>need some sort of translation layer to go from the vfio uapi defined
>value to the kernel internal version?  Thanks,
>

All those constants are defined by PAPR specification, and those constants
defined here or by PATCH[1/2] aren't expected to be changed.

Thanks,
Gavin

>Alex
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 1/2] powerpc/eeh: Introduce eeh_pe_inject_err()

2015-03-16 Thread Gavin Shan
The patch defines PCI error types and functions in eeh.h and
exports function eeh_pe_inject_err(), which will be called by
VFIO driver to inject the specified PCI error to the indicated
PE for testing purpose.

Signed-off-by: Gavin Shan 
---
v2: Use EEH_ERR_FUNC_{MIN,MAX} to validate PCI error function
---
 arch/powerpc/include/asm/eeh.h | 26 ++
 arch/powerpc/kernel/eeh.c  | 40 
 2 files changed, 66 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 55abfd0..44366fa 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -195,6 +195,30 @@ enum {
 #define EEH_RESET_FUNDAMENTAL  3   /* Fundamental reset*/
 #define EEH_LOG_TEMP   1   /* EEH temporary error log  */
 #define EEH_LOG_PERM   2   /* EEH permanent error log  */
+#define EEH_ERR_TYPE_320   /* 32-bits PCI error
*/
+#define EEH_ERR_TYPE_641   /* 64-bits PCI error
*/
+#define EEH_ERR_FUNC_MIN   0
+#define EEH_ERR_FUNC_LD_MEM_ADDR   0   /* Memory load  */
+#define EEH_ERR_FUNC_LD_MEM_DATA   1
+#define EEH_ERR_FUNC_LD_IO_ADDR2   /* IO load  */
+#define EEH_ERR_FUNC_LD_IO_DATA3
+#define EEH_ERR_FUNC_LD_CFG_ADDR   4   /* Config load  */
+#define EEH_ERR_FUNC_LD_CFG_DATA   5
+#define EEH_ERR_FUNC_ST_MEM_ADDR   6   /* Memory store */
+#define EEH_ERR_FUNC_ST_MEM_DATA   7
+#define EEH_ERR_FUNC_ST_IO_ADDR8   /* IO store */
+#define EEH_ERR_FUNC_ST_IO_DATA9
+#define EEH_ERR_FUNC_ST_CFG_ADDR   10  /* Config store */
+#define EEH_ERR_FUNC_ST_CFG_DATA   11
+#define EEH_ERR_FUNC_DMA_RD_ADDR   12  /* DMA read */
+#define EEH_ERR_FUNC_DMA_RD_DATA   13
+#define EEH_ERR_FUNC_DMA_RD_MASTER 14
+#define EEH_ERR_FUNC_DMA_RD_TARGET 15
+#define EEH_ERR_FUNC_DMA_WR_ADDR   16  /* DMA write*/
+#define EEH_ERR_FUNC_DMA_WR_DATA   17
+#define EEH_ERR_FUNC_DMA_WR_MASTER 18
+#define EEH_ERR_FUNC_DMA_WR_TARGET 19
+#define EEH_ERR_FUNC_MAX   19
 
 struct eeh_ops {
char *name;
@@ -295,6 +319,8 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option);
 int eeh_pe_get_state(struct eeh_pe *pe);
 int eeh_pe_reset(struct eeh_pe *pe, int option);
 int eeh_pe_configure(struct eeh_pe *pe);
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 19a897c..55083e5 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1651,6 +1651,46 @@ int eeh_pe_configure(struct eeh_pe *pe)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_configure);
 
+/**
+ * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @function: error function
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject the specified PCI error, which
+ * is determined by @type and @function, to the indicated PE for
+ * testing purpose.
+ */
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask)
+{
+   /* Invalid PE ? */
+   if (!pe)
+   return -ENODEV;
+
+   /* Unsupported operation ? */
+   if (!eeh_ops || !eeh_ops->err_inject)
+   return -ENOENT;
+
+   /* Check on PCI error type */
+   switch (type) {
+   case EEH_ERR_TYPE_32:
+   case EEH_ERR_TYPE_64:
+   break;
+   default:
+   return -EINVAL;
+   }
+
+   /* Check on PCI error function */
+   if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
+   return -EINVAL;
+
+   return eeh_ops->err_inject(pe, type, func, addr, mask);
+}
+EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
if (!eeh_enabled()) {
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/2] drivers/vfio: Support EEH error injection

2015-03-16 Thread Gavin Shan
The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
to inject the specified EEH error, which is represented by
(struct vfio_eeh_pe_err), to the indicated PE for testing purpose.

Signed-off-by: Gavin Shan 
---
v2: Put additional arguments for error injection to union
---
 Documentation/vfio.txt| 12 
 drivers/vfio/vfio_spapr_eeh.c | 10 ++
 include/uapi/linux/vfio.h | 36 +++-
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978ec..c6e11a3 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -385,6 +385,18 @@ The code flow from the example above should be slightly 
changed:
 

 
+   /* Inject EEH error, which is expected to be caused by 32-bits
+* config load.
+*/
+   pe_op.op = VFIO_EEH_PE_INJECT_ERR;
+   pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
+   pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
+   pe_op.err.addr = 0ul;
+   pe_op.err.mask = 0ul;
+   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+   
+
/* When 0xFF's returned from reading PCI config space or IO BARs
 * of the PCI device. Check the PE's state to see if that has been
 * frozen.
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db..25ca634 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
case VFIO_EEH_PE_CONFIGURE:
ret = eeh_pe_configure(pe);
break;
+   case VFIO_EEH_PE_INJECT_ERR:
+   if (op.argsz < sizeof(struct vfio_eeh_pe_op))
+   return -EINVAL;
+   if (copy_from_user(&op, (void __user *)arg,
+  sizeof(struct vfio_eeh_pe_op)))
+   return -EFAULT;
+
+   ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
+   op.err.addr, op.err.mask);
+   break;
default:
ret = -EINVAL;
}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 82889c3..f68e962 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+   __u32 type;
+   __u32 func;
+   __u64 addr;
+   __u64 mask;
+};
+
 struct vfio_eeh_pe_op {
__u32 argsz;
__u32 flags;
__u32 op;
+   union {
+   struct vfio_eeh_pe_err err;
+   };
 };
 
 #define VFIO_EEH_PE_DISABLE0   /* Disable EEH functionality */
@@ -490,6 +501,29 @@ struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_RESET_HOT  6   /* Assert hot reset  */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL  7   /* Assert fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE  8   /* PE configuration  */
+#define VFIO_EEH_PE_INJECT_ERR 9   /* Inject EEH error  */
+#define  VFIO_EEH_ERR_TYPE_32  0   /* 32-bits EEH error type*/
+#define  VFIO_EEH_ERR_TYPE_64  1   /* 64-bits EEH error type*/
+#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR 0   /* Memory load  */
+#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA 1
+#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR  2   /* IO load  */
+#define  VFIO_EEH_ERR_FUNC_LD_IO_DATA  3
+#define  VFIO_EEH_ERR_FUNC_LD_CFG_ADDR 4   /* Config load  */
+#define  VFIO_EEH_ERR_FUNC_LD_CFG_DATA 5
+#define  VFIO_EEH_ERR_FUNC_ST_MEM_ADDR 6   /* Memory store */
+#define  VFIO_EEH_ERR_FUNC_ST_MEM_DATA 7
+#define  VFIO_EEH_ERR_FUNC_ST_IO_ADDR  8   /* IO store */
+#define  VFIO_EEH_ERR_FUNC_ST_IO_DATA  9
+#define  VFIO_EEH_ERR_FUNC_ST_CFG_ADDR 10  /* Config store */
+#define  VFIO_EEH_ERR_FUNC_ST_CFG_DATA 11
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_ADDR 12  /* DMA read */
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_DATA 13
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_MASTER   14
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_TARGET   15
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_ADDR 16  /* DMA write*/
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_DATA 17
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_MASTER   18
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_TARGET   19
 
 #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.o

Re: [PATCH v2 2/2] drivers/vfio: Support EEH error injection

2015-03-17 Thread Gavin Shan
On Tue, Mar 17, 2015 at 02:45:49PM -0600, Alex Williamson wrote:
>On Mon, 2015-03-16 at 18:01 +1100, Gavin Shan wrote:
>> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> to inject the specified EEH error, which is represented by
>> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> 
>> Signed-off-by: Gavin Shan 
>> ---
>> v2: Put additional arguments for error injection to union
>> ---
>>  Documentation/vfio.txt| 12 
>>  drivers/vfio/vfio_spapr_eeh.c | 10 ++
>>  include/uapi/linux/vfio.h | 36 +++-
>>  3 files changed, 57 insertions(+), 1 deletion(-)
>> 
>> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> index 96978ec..c6e11a3 100644
>> --- a/Documentation/vfio.txt
>> +++ b/Documentation/vfio.txt
>> @@ -385,6 +385,18 @@ The code flow from the example above should be slightly 
>> changed:
>>  
>>  
>>  
>> +/* Inject EEH error, which is expected to be caused by 32-bits
>> + * config load.
>> + */
>> +pe_op.op = VFIO_EEH_PE_INJECT_ERR;
>> +pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
>> +pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
>> +pe_op.err.addr = 0ul;
>> +pe_op.err.mask = 0ul;
>> +ioctl(container, VFIO_EEH_PE_OP, &pe_op);
>> +
>> +
>> +
>>  /* When 0xFF's returned from reading PCI config space or IO BARs
>>   * of the PCI device. Check the PE's state to see if that has been
>>   * frozen.
>> diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
>> index 5fa42db..25ca634 100644
>> --- a/drivers/vfio/vfio_spapr_eeh.c
>> +++ b/drivers/vfio/vfio_spapr_eeh.c
>> @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>>  case VFIO_EEH_PE_CONFIGURE:
>>  ret = eeh_pe_configure(pe);
>>  break;
>> +case VFIO_EEH_PE_INJECT_ERR:
>> +if (op.argsz < sizeof(struct vfio_eeh_pe_op))
>
>This will need to be updated if vfio_eeh_pe_op ever gets updated again,
>why not just use offsetofend() now and avoid that future hassle and
>breakage.
>

Good point. I'll update to use "minsz = offsetofend(struct vfio_eeh_pe_op, 
err.mask)",
then use "minsz" for the parameter check and memory copy.

>> +return -EINVAL;
>> +if (copy_from_user(&op, (void __user *)arg,
>> +   sizeof(struct vfio_eeh_pe_op)))
>
>And here.
>

As above.

>BTW, please use cover letters
>

Sure, thanks for review!

Thanks,
Gavin

>> +return -EFAULT;
>> +
>> +ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
>> +op.err.addr, op.err.mask);
>> +break;
>>  default:
>>  ret = -EINVAL;
>>  }
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index 82889c3..f68e962 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
>>   * - unfreeze IO/DMA for frozen PE;
>>   * - read PE state;
>>   * - reset PE;
>> - * - configure PE.
>> + * - configure PE;
>> + * - inject EEH error.
>>   */
>> +struct vfio_eeh_pe_err {
>> +__u32 type;
>> +__u32 func;
>> +__u64 addr;
>> +__u64 mask;
>> +};
>> +
>>  struct vfio_eeh_pe_op {
>>  __u32 argsz;
>>  __u32 flags;
>>  __u32 op;
>> +union {
>> +struct vfio_eeh_pe_err err;
>> +};
>>  };
>>  
>>  #define VFIO_EEH_PE_DISABLE 0   /* Disable EEH functionality */
>> @@ -490,6 +501,29 @@ struct vfio_eeh_pe_op {
>>  #define VFIO_EEH_PE_RESET_HOT   6   /* Assert hot reset 
>>  */
>>  #define VFIO_EEH_PE_RESET_FUNDAMENTAL   7   /* Assert fundamental 
>> reset  */
>>  #define VFIO_EEH_PE_CONFIGURE   8   /* PE configuration 
>>  */
>> +#define VFIO_EEH_PE_INJECT_ERR  9   /* Inject EEH error 
>>  */
>> +#define  VFIO_EEH_ERR_TYPE_32   0   /* 32-bits EEH error 
>> type*/
>> +#define  VFIO_EEH_ERR_TYPE_64   1   /* 64-bits EEH error 
>> type*/

[PATCH v3 2/2] drivers/vfio: Support EEH error injection

2015-03-20 Thread Gavin Shan
The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
to inject the specified EEH error, which is represented by
(struct vfio_eeh_pe_err), to the indicated PE for testing purpose.

Signed-off-by: Gavin Shan 
---
 Documentation/vfio.txt| 12 
 drivers/vfio/vfio_spapr_eeh.c | 10 ++
 include/uapi/linux/vfio.h | 36 +++-
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978ec..c6e11a3 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -385,6 +385,18 @@ The code flow from the example above should be slightly 
changed:
 

 
+   /* Inject EEH error, which is expected to be caused by 32-bits
+* config load.
+*/
+   pe_op.op = VFIO_EEH_PE_INJECT_ERR;
+   pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
+   pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
+   pe_op.err.addr = 0ul;
+   pe_op.err.mask = 0ul;
+   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+   
+
/* When 0xFF's returned from reading PCI config space or IO BARs
 * of the PCI device. Check the PE's state to see if that has been
 * frozen.
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db..38edeb4 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
case VFIO_EEH_PE_CONFIGURE:
ret = eeh_pe_configure(pe);
break;
+   case VFIO_EEH_PE_INJECT_ERR:
+   minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
+   if (op.argsz < minsz)
+   return -EINVAL;
+   if (copy_from_user(&op, (void __user *)arg, minsz))
+   return -EFAULT;
+
+   ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
+   op.err.addr, op.err.mask);
+   break;
default:
ret = -EINVAL;
}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 82889c3..f68e962 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+   __u32 type;
+   __u32 func;
+   __u64 addr;
+   __u64 mask;
+};
+
 struct vfio_eeh_pe_op {
__u32 argsz;
__u32 flags;
__u32 op;
+   union {
+   struct vfio_eeh_pe_err err;
+   };
 };
 
 #define VFIO_EEH_PE_DISABLE0   /* Disable EEH functionality */
@@ -490,6 +501,29 @@ struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_RESET_HOT  6   /* Assert hot reset  */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL  7   /* Assert fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE  8   /* PE configuration  */
+#define VFIO_EEH_PE_INJECT_ERR 9   /* Inject EEH error  */
+#define  VFIO_EEH_ERR_TYPE_32  0   /* 32-bits EEH error type*/
+#define  VFIO_EEH_ERR_TYPE_64  1   /* 64-bits EEH error type*/
+#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR 0   /* Memory load  */
+#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA 1
+#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR  2   /* IO load  */
+#define  VFIO_EEH_ERR_FUNC_LD_IO_DATA  3
+#define  VFIO_EEH_ERR_FUNC_LD_CFG_ADDR 4   /* Config load  */
+#define  VFIO_EEH_ERR_FUNC_LD_CFG_DATA 5
+#define  VFIO_EEH_ERR_FUNC_ST_MEM_ADDR 6   /* Memory store */
+#define  VFIO_EEH_ERR_FUNC_ST_MEM_DATA 7
+#define  VFIO_EEH_ERR_FUNC_ST_IO_ADDR  8   /* IO store */
+#define  VFIO_EEH_ERR_FUNC_ST_IO_DATA  9
+#define  VFIO_EEH_ERR_FUNC_ST_CFG_ADDR 10  /* Config store */
+#define  VFIO_EEH_ERR_FUNC_ST_CFG_DATA 11
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_ADDR 12  /* DMA read */
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_DATA 13
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_MASTER   14
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_TARGET   15
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_ADDR 16  /* DMA write*/
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_DATA 17
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_MASTER   18
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_TARGET   19
 
 #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 1/2] powerpc/eeh: Introduce eeh_pe_inject_err()

2015-03-20 Thread Gavin Shan
The patch defines PCI error types and functions in eeh.h and
exports function eeh_pe_inject_err(), which will be called by
VFIO driver to inject the specified PCI error to the indicated
PE for testing purpose.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h | 26 ++
 arch/powerpc/kernel/eeh.c  | 35 +++
 2 files changed, 61 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 61912fc..85a17de 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -194,6 +194,30 @@ enum {
 #define EEH_RESET_FUNDAMENTAL  3   /* Fundamental reset*/
 #define EEH_LOG_TEMP   1   /* EEH temporary error log  */
 #define EEH_LOG_PERM   2   /* EEH permanent error log  */
+#define EEH_ERR_TYPE_320   /* 32-bits PCI error
*/
+#define EEH_ERR_TYPE_641   /* 64-bits PCI error
*/
+#define EEH_ERR_FUNC_MIN   0
+#define EEH_ERR_FUNC_LD_MEM_ADDR   0   /* Memory load  */
+#define EEH_ERR_FUNC_LD_MEM_DATA   1
+#define EEH_ERR_FUNC_LD_IO_ADDR2   /* IO load  */
+#define EEH_ERR_FUNC_LD_IO_DATA3
+#define EEH_ERR_FUNC_LD_CFG_ADDR   4   /* Config load  */
+#define EEH_ERR_FUNC_LD_CFG_DATA   5
+#define EEH_ERR_FUNC_ST_MEM_ADDR   6   /* Memory store */
+#define EEH_ERR_FUNC_ST_MEM_DATA   7
+#define EEH_ERR_FUNC_ST_IO_ADDR8   /* IO store */
+#define EEH_ERR_FUNC_ST_IO_DATA9
+#define EEH_ERR_FUNC_ST_CFG_ADDR   10  /* Config store */
+#define EEH_ERR_FUNC_ST_CFG_DATA   11
+#define EEH_ERR_FUNC_DMA_RD_ADDR   12  /* DMA read */
+#define EEH_ERR_FUNC_DMA_RD_DATA   13
+#define EEH_ERR_FUNC_DMA_RD_MASTER 14
+#define EEH_ERR_FUNC_DMA_RD_TARGET 15
+#define EEH_ERR_FUNC_DMA_WR_ADDR   16  /* DMA write*/
+#define EEH_ERR_FUNC_DMA_WR_DATA   17
+#define EEH_ERR_FUNC_DMA_WR_MASTER 18
+#define EEH_ERR_FUNC_DMA_WR_TARGET 19
+#define EEH_ERR_FUNC_MAX   19
 
 struct eeh_ops {
char *name;
@@ -293,6 +317,8 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option);
 int eeh_pe_get_state(struct eeh_pe *pe);
 int eeh_pe_reset(struct eeh_pe *pe, int option);
 int eeh_pe_configure(struct eeh_pe *pe);
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 76253eb..daa68a1 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1636,6 +1636,41 @@ int eeh_pe_configure(struct eeh_pe *pe)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_configure);
 
+/**
+ * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @function: error function
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject the specified PCI error, which
+ * is determined by @type and @function, to the indicated PE for
+ * testing purpose.
+ */
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask)
+{
+   /* Invalid PE ? */
+   if (!pe)
+   return -ENODEV;
+
+   /* Unsupported operation ? */
+   if (!eeh_ops || !eeh_ops->err_inject)
+   return -ENOENT;
+
+   /* Check on PCI error type */
+   if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
+   return -EINVAL;
+
+   /* Check on PCI error function */
+   if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
+   return -EINVAL;
+
+   return eeh_ops->err_inject(pe, type, func, addr, mask);
+}
+EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
if (!eeh_enabled()) {
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 0/2] EEH Error Injection Support for VFIO Devices

2015-03-20 Thread Gavin Shan
Those two patches are extention to EEH support for VFIO PCI devices,
which allows to inject EEH errors to VFIO PCI devices from userspace
for testing purpose.

Changelog
=
v2 -> v3:
* Use offsetofend(), instead of sizeof(struct vfio_eeh_pe_op)
  to calculate argument buffer size.
v1 -> v2:
* Use EEH_ERR_FUNC_{MIN,MAX} to validate PCI error function.
* Put additional arguments for error injection to union in
  struct vfio_eeh_pe_op.

Gavin Shan (2):
  powerpc/eeh: Introduce eeh_pe_inject_err()
  drivers/vfio: Support EEH error injection

 Documentation/vfio.txt | 12 
 arch/powerpc/include/asm/eeh.h | 26 ++
 arch/powerpc/kernel/eeh.c  | 35 +++
 drivers/vfio/vfio_spapr_eeh.c  | 10 ++
 include/uapi/linux/vfio.h  | 36 +++-
 5 files changed, 118 insertions(+), 1 deletion(-)

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection

2015-03-22 Thread Gavin Shan
On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote:
>On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote:
>> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> to inject the specified EEH error, which is represented by
>> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> 
>> Signed-off-by: Gavin Shan 
>
>Reviewed-by: David Gibson 
>
>> ---
>>  Documentation/vfio.txt| 12 
>>  drivers/vfio/vfio_spapr_eeh.c | 10 ++
>>  include/uapi/linux/vfio.h | 36 +++-
>>  3 files changed, 57 insertions(+), 1 deletion(-)
>> 
>> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> index 96978ec..c6e11a3 100644
>> --- a/Documentation/vfio.txt
>> +++ b/Documentation/vfio.txt
>> @@ -385,6 +385,18 @@ The code flow from the example above should be slightly 
>> changed:
>>  
>>  
>>  
>> +/* Inject EEH error, which is expected to be caused by 32-bits
>> + * config load.
>> + */
>> +pe_op.op = VFIO_EEH_PE_INJECT_ERR;
>> +pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
>> +pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
>> +pe_op.err.addr = 0ul;
>> +pe_op.err.mask = 0ul;
>> +ioctl(container, VFIO_EEH_PE_OP, &pe_op);
>> +
>> +
>> +
>>  /* When 0xFF's returned from reading PCI config space or IO BARs
>>   * of the PCI device. Check the PE's state to see if that has been
>>   * frozen.
>> diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
>> index 5fa42db..38edeb4 100644
>> --- a/drivers/vfio/vfio_spapr_eeh.c
>> +++ b/drivers/vfio/vfio_spapr_eeh.c
>> @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>>  case VFIO_EEH_PE_CONFIGURE:
>>  ret = eeh_pe_configure(pe);
>>  break;
>> +case VFIO_EEH_PE_INJECT_ERR:
>> +minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
>> +if (op.argsz < minsz)
>> +return -EINVAL;
>> +if (copy_from_user(&op, (void __user *)arg, minsz))
>> +return -EFAULT;
>> +
>> +ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
>> +op.err.addr, op.err.mask);
>> +break;
>>  default:
>>  ret = -EINVAL;
>>  }
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index 82889c3..f68e962 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
>>   * - unfreeze IO/DMA for frozen PE;
>>   * - read PE state;
>>   * - reset PE;
>> - * - configure PE.
>> + * - configure PE;
>> + * - inject EEH error.
>>   */
>> +struct vfio_eeh_pe_err {
>> +__u32 type;
>> +__u32 func;
>> +__u64 addr;
>> +__u64 mask;
>> +};
>> +
>>  struct vfio_eeh_pe_op {
>>  __u32 argsz;
>>  __u32 flags;
>>  __u32 op;
>> +union {
>> +struct vfio_eeh_pe_err err;
>> +};
>>  };
>>  
>>  #define VFIO_EEH_PE_DISABLE 0   /* Disable EEH functionality */
>> @@ -490,6 +501,29 @@ struct vfio_eeh_pe_op {
>>  #define VFIO_EEH_PE_RESET_HOT   6   /* Assert hot reset 
>>  */
>>  #define VFIO_EEH_PE_RESET_FUNDAMENTAL   7   /* Assert fundamental 
>> reset  */
>>  #define VFIO_EEH_PE_CONFIGURE   8   /* PE configuration 
>>  */
>> +#define VFIO_EEH_PE_INJECT_ERR  9   /* Inject EEH error 
>>  */
>> +#define  VFIO_EEH_ERR_TYPE_32   0   /* 32-bits EEH error 
>> type*/
>> +#define  VFIO_EEH_ERR_TYPE_64   1   /* 64-bits EEH error 
>> type*/
>> +#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR  0   /* Memory load  
>> */
>> +#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA  1
>> +#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR   2   /* IO load  
>> */
>> +#define  VFIO_EEH_ERR_FUNC_LD_IO_DATA   3
>> +#define  VFIO_EEH_ERR_FUNC_LD_CFG_ADDR  4   /* Config load  
>> */
>> +#define  VFIO_EEH_ERR_FUNC_LD_CFG_DATA  5
>> +#define  V

Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection

2015-03-22 Thread Gavin Shan
On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote:
>On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote:
>> On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote:
>> >On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote:
>> >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> >> to inject the specified EEH error, which is represented by
>> >> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> >> 
>> >> Signed-off-by: Gavin Shan 
>> >
>> >Reviewed-by: David Gibson 
>> >
>> >> ---
>> >>  Documentation/vfio.txt| 12 
>> >>  drivers/vfio/vfio_spapr_eeh.c | 10 ++
>> >>  include/uapi/linux/vfio.h | 36 +++-
>> >>  3 files changed, 57 insertions(+), 1 deletion(-)
>> >> 
>> >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> >> index 96978ec..c6e11a3 100644
>> >> --- a/Documentation/vfio.txt
>> >> +++ b/Documentation/vfio.txt
>> >> @@ -385,6 +385,18 @@ The code flow from the example above should be 
>> >> slightly changed:
>> >>  
>> >>   
>> >>  
>> >> + /* Inject EEH error, which is expected to be caused by 32-bits
>> >> +  * config load.
>> >> +  */
>> >> + pe_op.op = VFIO_EEH_PE_INJECT_ERR;
>> >> + pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
>> >> + pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
>> >> + pe_op.err.addr = 0ul;
>> >> + pe_op.err.mask = 0ul;
>> >> + ioctl(container, VFIO_EEH_PE_OP, &pe_op);
>> >> +
>> >> + 
>> >> +
>> >>   /* When 0xFF's returned from reading PCI config space or IO BARs
>> >>* of the PCI device. Check the PE's state to see if that has been
>> >>* frozen.
>> >> diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
>> >> index 5fa42db..38edeb4 100644
>> >> --- a/drivers/vfio/vfio_spapr_eeh.c
>> >> +++ b/drivers/vfio/vfio_spapr_eeh.c
>> >> @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group 
>> >> *group,
>> >>   case VFIO_EEH_PE_CONFIGURE:
>> >>   ret = eeh_pe_configure(pe);
>> >>   break;
>> >> + case VFIO_EEH_PE_INJECT_ERR:
>> >> + minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
>> >> + if (op.argsz < minsz)
>> >> + return -EINVAL;
>> >> + if (copy_from_user(&op, (void __user *)arg, minsz))
>> >> + return -EFAULT;
>> >> +
>> >> + ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
>> >> + op.err.addr, op.err.mask);
>> >> + break;
>> >>   default:
>> >>   ret = -EINVAL;
>> >>   }
>> >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> >> index 82889c3..f68e962 100644
>> >> --- a/include/uapi/linux/vfio.h
>> >> +++ b/include/uapi/linux/vfio.h
>> >> @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
>> >>   * - unfreeze IO/DMA for frozen PE;
>> >>   * - read PE state;
>> >>   * - reset PE;
>> >> - * - configure PE.
>> >> + * - configure PE;
>> >> + * - inject EEH error.
>> >>   */
>> >> +struct vfio_eeh_pe_err {
>> >> + __u32 type;
>> >> + __u32 func;
>> >> + __u64 addr;
>> >> + __u64 mask;
>> >> +};
>> >> +
>> >>  struct vfio_eeh_pe_op {
>> >>   __u32 argsz;
>> >>   __u32 flags;
>> >>   __u32 op;
>> >> + union {
>> >> + struct vfio_eeh_pe_err err;
>> >> + };
>> >>  };
>> >>  
>> >>  #define VFIO_EEH_PE_DISABLE  0   /* Disable EEH 
>> >> functionality */
>> >> @@ -490,6 +501,29 @@ struct vfio_eeh_pe_op {
>> >>  #define VFIO_EEH_PE_RESET_HOT6   /* Assert hot reset 
>> >>  */
>> >>  #define VFIO_EEH_PE_RESET_FUNDAMENTAL7   /* Assert fundamental 
>> >> reset  */
>> >>  #define VF

Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection

2015-03-22 Thread Gavin Shan
On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote:
>On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote:
>> On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote:
>> >On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote:
>> >> On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote:
>> >> >On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote:
>> >> >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> >> >> to inject the specified EEH error, which is represented by
>> >> >> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> >> >> 
>> >> >> Signed-off-by: Gavin Shan 
>> >> >
>> >> >Reviewed-by: David Gibson 
>> >> >
>> >> >> ---
>> >> >>  Documentation/vfio.txt| 12 
>> >> >>  drivers/vfio/vfio_spapr_eeh.c | 10 ++
>> >> >>  include/uapi/linux/vfio.h | 36 
>> >> >> +++-
>> >> >>  3 files changed, 57 insertions(+), 1 deletion(-)
>> >> >> 
>> >> >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> >> >> index 96978ec..c6e11a3 100644
>> >> >> --- a/Documentation/vfio.txt
>> >> >> +++ b/Documentation/vfio.txt
>> >> >> @@ -385,6 +385,18 @@ The code flow from the example above should be 
>> >> >> slightly changed:
>> >> >>  
>> >> >>
>> >> >>  
>> >> >> +  /* Inject EEH error, which is expected to be caused by 32-bits
>> >> >> +   * config load.
>> >> >> +   */
>> >> >> +  pe_op.op = VFIO_EEH_PE_INJECT_ERR;
>> >> >> +  pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
>> >> >> +  pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
>> >> >> +  pe_op.err.addr = 0ul;
>> >> >> +  pe_op.err.mask = 0ul;
>> >> >> +  ioctl(container, VFIO_EEH_PE_OP, &pe_op);
>> >> >> +
>> >> >> +  
>> >> >> +
>> >> >>/* When 0xFF's returned from reading PCI config space or IO BARs
>> >> >> * of the PCI device. Check the PE's state to see if that has 
>> >> >> been
>> >> >> * frozen.
>> >> >> diff --git a/drivers/vfio/vfio_spapr_eeh.c 
>> >> >> b/drivers/vfio/vfio_spapr_eeh.c
>> >> >> index 5fa42db..38edeb4 100644
>> >> >> --- a/drivers/vfio/vfio_spapr_eeh.c
>> >> >> +++ b/drivers/vfio/vfio_spapr_eeh.c
>> >> >> @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group 
>> >> >> *group,
>> >> >>case VFIO_EEH_PE_CONFIGURE:
>> >> >>ret = eeh_pe_configure(pe);
>> >> >>break;
>> >> >> +  case VFIO_EEH_PE_INJECT_ERR:
>> >> >> +  minsz = offsetofend(struct vfio_eeh_pe_op, 
>> >> >> err.mask);
>> >> >> +  if (op.argsz < minsz)
>> >> >> +  return -EINVAL;
>> >> >> +  if (copy_from_user(&op, (void __user *)arg, 
>> >> >> minsz))
>> >> >> +  return -EFAULT;
>> >> >> +
>> >> >> +  ret = eeh_pe_inject_err(pe, op.err.type, 
>> >> >> op.err.func,
>> >> >> +  op.err.addr, 
>> >> >> op.err.mask);
>> >> >> +  break;
>> >> >>default:
>> >> >>ret = -EINVAL;
>> >> >>}
>> >> >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> >> >> index 82889c3..f68e962 100644
>> >> >> --- a/include/uapi/linux/vfio.h
>> >> >> +++ b/include/uapi/linux/vfio.h
>> >> >> @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
>> >> >>   * - unfreeze IO/DMA for frozen PE;
>> >> >>   * - read PE state;
>> >&g

Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection

2015-03-23 Thread Gavin Shan
On Mon, Mar 23, 2015 at 10:14:59AM -0600, Alex Williamson wrote:
>On Mon, 2015-03-23 at 16:20 +1100, Gavin Shan wrote:
>> On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote:
>> >On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote:
>> >> On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote:
>> >> >On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote:
>> >> >> On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote:
>> >> >> >On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote:
>> >> >> >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> >> >> >> to inject the specified EEH error, which is represented by
>> >> >> >> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> >> >> >> 
>> >> >> >> Signed-off-by: Gavin Shan 
>> >> >> >
>> >> >> >Reviewed-by: David Gibson 
>> >> >> >
>> >> >> >> ---
>> >> >> >>  Documentation/vfio.txt| 12 
>> >> >> >>  drivers/vfio/vfio_spapr_eeh.c | 10 ++
>> >> >> >>  include/uapi/linux/vfio.h | 36 
>> >> >> >> +++-
>> >> >> >>  3 files changed, 57 insertions(+), 1 deletion(-)
>> >> >> >> 
>> >> >> >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> >> >> >> index 96978ec..c6e11a3 100644
>> >> >> >> --- a/Documentation/vfio.txt
>> >> >> >> +++ b/Documentation/vfio.txt
>> >> >> >> @@ -385,6 +385,18 @@ The code flow from the example above should be 
>> >> >> >> slightly changed:
>> >> >> >>  
>> >> >> >> 
>> >> >> >>  
>> >> >> >> +   /* Inject EEH error, which is expected to be caused by 32-bits
>> >> >> >> +* config load.
>> >> >> >> +*/
>> >> >> >> +   pe_op.op = VFIO_EEH_PE_INJECT_ERR;
>> >> >> >> +   pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
>> >> >> >> +   pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
>> >> >> >> +   pe_op.err.addr = 0ul;
>> >> >> >> +   pe_op.err.mask = 0ul;
>> >> >> >> +   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
>> >> >> >> +
>> >> >> >> +   
>> >> >> >> +
>> >> >> >> /* When 0xFF's returned from reading PCI config space or IO BARs
>> >> >> >>  * of the PCI device. Check the PE's state to see if that has 
>> >> >> >> been
>> >> >> >>  * frozen.
>> >> >> >> diff --git a/drivers/vfio/vfio_spapr_eeh.c 
>> >> >> >> b/drivers/vfio/vfio_spapr_eeh.c
>> >> >> >> index 5fa42db..38edeb4 100644
>> >> >> >> --- a/drivers/vfio/vfio_spapr_eeh.c
>> >> >> >> +++ b/drivers/vfio/vfio_spapr_eeh.c
>> >> >> >> @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct 
>> >> >> >> iommu_group *group,
>> >> >> >> case VFIO_EEH_PE_CONFIGURE:
>> >> >> >> ret = eeh_pe_configure(pe);
>> >> >> >> break;
>> >> >> >> +   case VFIO_EEH_PE_INJECT_ERR:
>> >> >> >> +   minsz = offsetofend(struct vfio_eeh_pe_op, 
>> >> >> >> err.mask);
>> >> >> >> +   if (op.argsz < minsz)
>> >> >> >> +   return -EINVAL;
>> >> >> >> +   if (copy_from_user(&op, (void __user *)arg, 
>> >> >> >> minsz))
>> >> >> >> +   return -EFAULT;
>> >> >> >> +
>> >> >> >> +   ret = eeh_pe_inject_err(pe, op.err.type, 
>> >> >> >> op.err.func,
>> >> >> >> +   op.err.addr, 
>> >> >> >> op.err.mask);
>> >> >> >> +   b

Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection

2015-03-23 Thread Gavin Shan
On Tue, Mar 24, 2015 at 12:22:25PM +1100, David Gibson wrote:
>On Tue, Mar 24, 2015 at 09:47:54AM +1100, Gavin Shan wrote:
>> On Mon, Mar 23, 2015 at 10:14:59AM -0600, Alex Williamson wrote:
>> >On Mon, 2015-03-23 at 16:20 +1100, Gavin Shan wrote:
>> >> On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote:
>> >> >On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote:
>> >> >> On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote:
>> >> >> >On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote:
>> >> >> >> On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote:
>> >> >> >> >On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote:
>> >> >> >> >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> >> >> >> >> to inject the specified EEH error, which is represented by
>> >> >> >> >> (struct vfio_eeh_pe_err), to the indicated PE for testing 
>> >> >> >> >> purpose.
>> >> >> >> >> 
>> >> >> >> >> Signed-off-by: Gavin Shan 
>> >> >> >> >
>> >> >> >> >Reviewed-by: David Gibson 
>> >> >> >> >
>> >> >> >> >> ---
>> >> >> >> >>  Documentation/vfio.txt| 12 
>> >> >> >> >>  drivers/vfio/vfio_spapr_eeh.c | 10 ++
>> >> >> >> >>  include/uapi/linux/vfio.h | 36 
>> >> >> >> >> +++-
>> >> >> >> >>  3 files changed, 57 insertions(+), 1 deletion(-)
>> >> >> >> >> 
>> >> >> >> >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> >> >> >> >> index 96978ec..c6e11a3 100644
>> >> >> >> >> --- a/Documentation/vfio.txt
>> >> >> >> >> +++ b/Documentation/vfio.txt
>> >> >> >> >> @@ -385,6 +385,18 @@ The code flow from the example above should 
>> >> >> >> >> be slightly changed:
>> >> >> >> >>  
>> >> >> >> >>  
>> >> >> >> >>  
>> >> >> >> >> +/* Inject EEH error, which is expected to be caused by 
>> >> >> >> >> 32-bits
>> >> >> >> >> + * config load.
>> >> >> >> >> + */
>> >> >> >> >> +pe_op.op = VFIO_EEH_PE_INJECT_ERR;
>> >> >> >> >> +pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
>> >> >> >> >> +pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
>> >> >> >> >> +pe_op.err.addr = 0ul;
>> >> >> >> >> +pe_op.err.mask = 0ul;
>> >> >> >> >> +ioctl(container, VFIO_EEH_PE_OP, &pe_op);
>> >> >> >> >> +
>> >> >> >> >> +
>> >> >> >> >> +
>> >> >> >> >>  /* When 0xFF's returned from reading PCI config space 
>> >> >> >> >> or IO BARs
>> >> >> >> >>   * of the PCI device. Check the PE's state to see if 
>> >> >> >> >> that has been
>> >> >> >> >>   * frozen.
>> >> >> >> >> diff --git a/drivers/vfio/vfio_spapr_eeh.c 
>> >> >> >> >> b/drivers/vfio/vfio_spapr_eeh.c
>> >> >> >> >> index 5fa42db..38edeb4 100644
>> >> >> >> >> --- a/drivers/vfio/vfio_spapr_eeh.c
>> >> >> >> >> +++ b/drivers/vfio/vfio_spapr_eeh.c
>> >> >> >> >> @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct 
>> >> >> >> >> iommu_group *group,
>> >> >> >> >>  case VFIO_EEH_PE_CONFIGURE:
>> >> >> >> >>  ret = eeh_pe_configure(pe);
>> >> >> >> >>  break;
>> >> >> >> >> +case VFIO_EEH_PE_INJECT_ERR:
>> >&

[PATCH v4 0/4] EEH Error Injection Support for VFIO Devices

2015-03-25 Thread Gavin Shan
The series of patches are extention to EEH support for VFIO PCI devices,
which allows to inject EEH errors to VFIO PCI devices from userspace
for testing purpose.

Changelog
=
v3 -> v4:
* Move constants for EEH PE states defined in uapi/linux/vfio.h
  to uapi/asm/eeh.h.
v2 -> v3:
* Use offsetofend(), instead of sizeof(struct vfio_eeh_pe_op)
  to calculate argument buffer size.
v1 -> v2:
* Use EEH_ERR_FUNC_{MIN,MAX} to validate PCI error function.
* Put additional arguments for error injection to union in
  struct vfio_eeh_pe_op.

Gavin Shan (4):
  powerpc/eeh: Eliminate duplicated PE states
  powerpc/eeh: Introduce eeh_pe_inject_err()
  drivers/vfio: Support EEH error injection
  drivers/vfio: Remove duplicated PE states

 Documentation/vfio.txt  | 12 
 arch/powerpc/include/asm/eeh.h  |  9 +++---
 arch/powerpc/include/uapi/asm/eeh.h | 56 +
 arch/powerpc/kernel/eeh.c   | 35 +++
 drivers/vfio/vfio_spapr_eeh.c   | 10 +++
 include/uapi/linux/vfio.h   | 19 +
 6 files changed, 130 insertions(+), 11 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/eeh.h

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 2/4] powerpc/eeh: Introduce eeh_pe_inject_err()

2015-03-25 Thread Gavin Shan
The patch defines PCI error types and functions in uapi/asm/eeh.h
and exports function eeh_pe_inject_err(), which will be called by
VFIO driver to inject the specified PCI error to the indicated
PE for testing purpose.

Signed-off-by: Gavin Shan 
Reviewed-by: David Gibson 
---
 arch/powerpc/include/asm/eeh.h  |  2 ++
 arch/powerpc/include/uapi/asm/eeh.h | 26 ++
 arch/powerpc/kernel/eeh.c   | 35 +++
 3 files changed, 63 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 08c4042..cd6003b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -291,6 +291,8 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option);
 int eeh_pe_get_state(struct eeh_pe *pe);
 int eeh_pe_reset(struct eeh_pe *pe, int option);
 int eeh_pe_configure(struct eeh_pe *pe);
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
diff --git a/arch/powerpc/include/uapi/asm/eeh.h 
b/arch/powerpc/include/uapi/asm/eeh.h
index 8bb34b0..291b7d1 100644
--- a/arch/powerpc/include/uapi/asm/eeh.h
+++ b/arch/powerpc/include/uapi/asm/eeh.h
@@ -27,4 +27,30 @@
 #define EEH_PE_STATE_STOPPED_DMA   4   /* Stopped DMA only */
 #define EEH_PE_STATE_UNAVAIL   5   /* Unavailable  */
 
+/* EEH error types and functions */
+#define EEH_ERR_TYPE_320   /* 32-bits error
*/
+#define EEH_ERR_TYPE_641   /* 64-bits error
*/
+#define EEH_ERR_FUNC_MIN   0
+#define EEH_ERR_FUNC_LD_MEM_ADDR   0   /* Memory load  */
+#define EEH_ERR_FUNC_LD_MEM_DATA   1
+#define EEH_ERR_FUNC_LD_IO_ADDR2   /* IO load  */
+#define EEH_ERR_FUNC_LD_IO_DATA3
+#define EEH_ERR_FUNC_LD_CFG_ADDR   4   /* Config load  */
+#define EEH_ERR_FUNC_LD_CFG_DATA   5
+#define EEH_ERR_FUNC_ST_MEM_ADDR   6   /* Memory store */
+#define EEH_ERR_FUNC_ST_MEM_DATA   7
+#define EEH_ERR_FUNC_ST_IO_ADDR8   /* IO store */
+#define EEH_ERR_FUNC_ST_IO_DATA9
+#define EEH_ERR_FUNC_ST_CFG_ADDR   10  /* Config store */
+#define EEH_ERR_FUNC_ST_CFG_DATA   11
+#define EEH_ERR_FUNC_DMA_RD_ADDR   12  /* DMA read */
+#define EEH_ERR_FUNC_DMA_RD_DATA   13
+#define EEH_ERR_FUNC_DMA_RD_MASTER 14
+#define EEH_ERR_FUNC_DMA_RD_TARGET 15
+#define EEH_ERR_FUNC_DMA_WR_ADDR   16  /* DMA write*/
+#define EEH_ERR_FUNC_DMA_WR_DATA   17
+#define EEH_ERR_FUNC_DMA_WR_MASTER 18
+#define EEH_ERR_FUNC_DMA_WR_TARGET 19
+#define EEH_ERR_FUNC_MAX   19
+
 #endif /* _ASM_POWERPC_EEH_H */
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 76253eb..daa68a1 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1636,6 +1636,41 @@ int eeh_pe_configure(struct eeh_pe *pe)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_configure);
 
+/**
+ * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @function: error function
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject the specified PCI error, which
+ * is determined by @type and @function, to the indicated PE for
+ * testing purpose.
+ */
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask)
+{
+   /* Invalid PE ? */
+   if (!pe)
+   return -ENODEV;
+
+   /* Unsupported operation ? */
+   if (!eeh_ops || !eeh_ops->err_inject)
+   return -ENOENT;
+
+   /* Check on PCI error type */
+   if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
+   return -EINVAL;
+
+   /* Check on PCI error function */
+   if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
+   return -EINVAL;
+
+   return eeh_ops->err_inject(pe, type, func, addr, mask);
+}
+EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
if (!eeh_enabled()) {
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 4/4] drivers/vfio: Remove duplicated PE states

2015-03-25 Thread Gavin Shan
The set of constants for PE states defined in uapi/linux/vfio.h is
duplicated to uapi/asm/eeh.h. The patch removes the set from the
former.

Signed-off-by: Gavin Shan 
---
 include/uapi/linux/vfio.h | 5 -
 1 file changed, 5 deletions(-)

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index d81c17f..3fd1e86 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -492,11 +492,6 @@ struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_UNFREEZE_IO2   /* Enable IO for frozen 
PE   */
 #define VFIO_EEH_PE_UNFREEZE_DMA   3   /* Enable DMA for frozen PE  */
 #define VFIO_EEH_PE_GET_STATE  4   /* PE state retrieval*/
-#define  VFIO_EEH_PE_STATE_NORMAL  0   /* PE in functional state*/
-#define  VFIO_EEH_PE_STATE_RESET   1   /* PE reset in progress  */
-#define  VFIO_EEH_PE_STATE_STOPPED 2   /* Stopped DMA and IO*/
-#define  VFIO_EEH_PE_STATE_STOPPED_DMA 4   /* Stopped DMA only  */
-#define  VFIO_EEH_PE_STATE_UNAVAIL 5   /* State unavailable */
 #define VFIO_EEH_PE_RESET_DEACTIVATE   5   /* Deassert PE reset */
 #define VFIO_EEH_PE_RESET_HOT  6   /* Assert hot reset  */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL  7   /* Assert fundamental reset  */
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 1/4] powerpc/eeh: Eliminate duplicated PE states

2015-03-25 Thread Gavin Shan
There are two equivalent sets of constants for PE states, defined
in arch/powerpc/include/asm/eeh.h and include/uapi/linux/vfio.h.
The former is used by EEH core and the latter is used by userspace.
The patch moves those constants from arch/powerpc/include/asm/eeh.h
to arch/powerpc/include/uapi/asm/eeh.h to avoid maintaining two
equivalent sets of constants.

Suggested-by: David Gibson 
Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h  |  7 ++-
 arch/powerpc/include/uapi/asm/eeh.h | 30 ++
 2 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/eeh.h

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d777c0c..08c4042 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -27,6 +27,8 @@
 #include 
 #include 
 
+#include 
+
 struct pci_dev;
 struct pci_bus;
 
@@ -184,11 +186,6 @@ enum {
 #define EEH_STATE_DMA_ACTIVE   (1 << 4)/* Active DMA   */
 #define EEH_STATE_MMIO_ENABLED (1 << 5)/* MMIO enabled */
 #define EEH_STATE_DMA_ENABLED  (1 << 6)/* DMA enabled  */
-#define EEH_PE_STATE_NORMAL0   /* Normal state */
-#define EEH_PE_STATE_RESET 1   /* PE reset asserted*/
-#define EEH_PE_STATE_STOPPED_IO_DMA2   /* Frozen PE*/
-#define EEH_PE_STATE_STOPPED_DMA   4   /* Stopped DMA, Enabled IO */
-#define EEH_PE_STATE_UNAVAIL   5   /* Unavailable  */
 #define EEH_RESET_DEACTIVATE   0   /* Deactivate the PE reset  */
 #define EEH_RESET_HOT  1   /* Hot reset*/
 #define EEH_RESET_FUNDAMENTAL  3   /* Fundamental reset*/
diff --git a/arch/powerpc/include/uapi/asm/eeh.h 
b/arch/powerpc/include/uapi/asm/eeh.h
new file mode 100644
index 000..8bb34b0
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/eeh.h
@@ -0,0 +1,30 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2015
+ *
+ * Authors: Gavin Shan 
+ */
+
+#ifndef _ASM_POWERPC_EEH_H
+#define _ASM_POWERPC_EEH_H
+
+/* PE states */
+#define EEH_PE_STATE_NORMAL0   /* Normal state */
+#define EEH_PE_STATE_RESET 1   /* PE reset asserted*/
+#define EEH_PE_STATE_STOPPED_IO_DMA2   /* Frozen PE*/
+#define EEH_PE_STATE_STOPPED_DMA   4   /* Stopped DMA only */
+#define EEH_PE_STATE_UNAVAIL   5   /* Unavailable  */
+
+#endif /* _ASM_POWERPC_EEH_H */
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4 3/4] drivers/vfio: Support EEH error injection

2015-03-25 Thread Gavin Shan
The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
to inject the specified EEH error, which is represented by
(struct vfio_eeh_pe_err), to the indicated PE for testing purpose.

Signed-off-by: Gavin Shan 
Reviewed-by: David Gibson 
---
 Documentation/vfio.txt| 12 
 drivers/vfio/vfio_spapr_eeh.c | 10 ++
 include/uapi/linux/vfio.h | 14 +-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978ec..4c746a7 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -385,6 +385,18 @@ The code flow from the example above should be slightly 
changed:
 

 
+   /* Inject EEH error, which is expected to be caused by 32-bits
+* config load.
+*/
+   pe_op.op = VFIO_EEH_PE_INJECT_ERR;
+   pe_op.err.type = EEH_ERR_TYPE_32;
+   pe_op.err.func = EEH_ERR_FUNC_LD_CFG_ADDR;
+   pe_op.err.addr = 0ul;
+   pe_op.err.mask = 0ul;
+   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+   
+
/* When 0xFF's returned from reading PCI config space or IO BARs
 * of the PCI device. Check the PE's state to see if that has been
 * frozen.
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db..38edeb4 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
case VFIO_EEH_PE_CONFIGURE:
ret = eeh_pe_configure(pe);
break;
+   case VFIO_EEH_PE_INJECT_ERR:
+   minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
+   if (op.argsz < minsz)
+   return -EINVAL;
+   if (copy_from_user(&op, (void __user *)arg, minsz))
+   return -EFAULT;
+
+   ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
+   op.err.addr, op.err.mask);
+   break;
default:
ret = -EINVAL;
}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 82889c3..d81c17f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+   __u32 type;
+   __u32 func;
+   __u64 addr;
+   __u64 mask;
+};
+
 struct vfio_eeh_pe_op {
__u32 argsz;
__u32 flags;
__u32 op;
+   union {
+   struct vfio_eeh_pe_err err;
+   };
 };
 
 #define VFIO_EEH_PE_DISABLE0   /* Disable EEH functionality */
@@ -490,6 +501,7 @@ struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_RESET_HOT  6   /* Assert hot reset  */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL  7   /* Assert fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE  8   /* PE configuration  */
+#define VFIO_EEH_PE_INJECT_ERR 9   /* Inject EEH error  */
 
 #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 4/4] drivers/vfio: Remove duplicated PE states

2015-03-25 Thread Gavin Shan
On Wed, Mar 25, 2015 at 06:46:28PM -0600, Alex Williamson wrote:
>On Thu, 2015-03-26 at 10:20 +1100, Gavin Shan wrote:
>> The set of constants for PE states defined in uapi/linux/vfio.h is
>> duplicated to uapi/asm/eeh.h. The patch removes the set from the
>> former.
>> 
>> Signed-off-by: Gavin Shan 
>> ---
>>  include/uapi/linux/vfio.h | 5 -
>>  1 file changed, 5 deletions(-)
>> 
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index d81c17f..3fd1e86 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -492,11 +492,6 @@ struct vfio_eeh_pe_op {
>>  #define VFIO_EEH_PE_UNFREEZE_IO 2   /* Enable IO for frozen 
>> PE   */
>>  #define VFIO_EEH_PE_UNFREEZE_DMA3   /* Enable DMA for frozen PE  */
>>  #define VFIO_EEH_PE_GET_STATE   4   /* PE state retrieval   
>>  */
>> -#define  VFIO_EEH_PE_STATE_NORMAL   0   /* PE in functional state*/
>> -#define  VFIO_EEH_PE_STATE_RESET1   /* PE reset in progress  */
>> -#define  VFIO_EEH_PE_STATE_STOPPED  2   /* Stopped DMA and IO*/
>> -#define  VFIO_EEH_PE_STATE_STOPPED_DMA  4   /* Stopped DMA only 
>>  */
>> -#define  VFIO_EEH_PE_STATE_UNAVAIL  5   /* State unavailable */
>>  #define VFIO_EEH_PE_RESET_DEACTIVATE5   /* Deassert PE reset
>>  */
>>  #define VFIO_EEH_PE_RESET_HOT   6   /* Assert hot reset 
>>  */
>>  #define VFIO_EEH_PE_RESET_FUNDAMENTAL   7   /* Assert fundamental 
>> reset  */
>
>How do you know that nobody depends on these defines?  I thought the
>suggestion was to use the EEH_* defines for error injection, not to
>remove existing VFIO_EEH_* defines.  You could certainly redefine these
>in terms of EEH_* defines instead.  Thanks,
>

QEMU should be the first user to utilize the EEH capability exposed by
the host kernel, and I believe QEMU doesn't use those constants yet.
So it's right time to move those constants to uapi/asm/eeh.h. Once some
one starts to use them, it's impossible to do so.

Thanks,
Gavin

>Alex
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 4/4] drivers/vfio: Remove duplicated PE states

2015-03-25 Thread Gavin Shan
On Thu, Mar 26, 2015 at 12:01:57PM +1100, David Gibson wrote:
>On Wed, Mar 25, 2015 at 06:46:28PM -0600, Alex Williamson wrote:
>> On Thu, 2015-03-26 at 10:20 +1100, Gavin Shan wrote:
>> > The set of constants for PE states defined in uapi/linux/vfio.h is
>> > duplicated to uapi/asm/eeh.h. The patch removes the set from the
>> > former.
>> > 
>> > Signed-off-by: Gavin Shan 
>> > ---
>> >  include/uapi/linux/vfio.h | 5 -
>> >  1 file changed, 5 deletions(-)
>> > 
>> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> > index d81c17f..3fd1e86 100644
>> > --- a/include/uapi/linux/vfio.h
>> > +++ b/include/uapi/linux/vfio.h
>> > @@ -492,11 +492,6 @@ struct vfio_eeh_pe_op {
>> >  #define VFIO_EEH_PE_UNFREEZE_IO   2   /* Enable IO for frozen 
>> > PE   */
>> >  #define VFIO_EEH_PE_UNFREEZE_DMA  3   /* Enable DMA for frozen PE  */
>> >  #define VFIO_EEH_PE_GET_STATE 4   /* PE state retrieval   
>> >  */
>> > -#define  VFIO_EEH_PE_STATE_NORMAL 0   /* PE in functional state*/
>> > -#define  VFIO_EEH_PE_STATE_RESET  1   /* PE reset in progress  */
>> > -#define  VFIO_EEH_PE_STATE_STOPPED2   /* Stopped DMA and IO   
>> >  */
>> > -#define  VFIO_EEH_PE_STATE_STOPPED_DMA4   /* Stopped DMA only 
>> >  */
>> > -#define  VFIO_EEH_PE_STATE_UNAVAIL5   /* State unavailable
>> >  */
>> >  #define VFIO_EEH_PE_RESET_DEACTIVATE  5   /* Deassert PE reset
>> >  */
>> >  #define VFIO_EEH_PE_RESET_HOT 6   /* Assert hot reset 
>> >  */
>> >  #define VFIO_EEH_PE_RESET_FUNDAMENTAL 7   /* Assert fundamental 
>> > reset  */
>> 
>> How do you know that nobody depends on these defines?  I thought the
>> suggestion was to use the EEH_* defines for error injection, not to
>> remove existing VFIO_EEH_* defines.  You could certainly redefine these
>> in terms of EEH_* defines instead.  Thanks,
>
>Yeah, since they're already exported, these can't be just removed, but
>should be redefined in terms of the new exported EEH defines.
>

I just explained to Alex.W with something as follows. Are you sure to
keep this set of defines in vfio.h? That way, the EEH error constants
are all defined in uapi/asm/eeh.h, but the EEH PE state constatns will
be distributed in vfio.h and uapi/asm/eeh.h at the same time. Actually,
I believe it's safe to move the PE state defines from vfio.h to
uapi/asm/eeh.h and now is the right time to do so :)

---

QEMU should be the first user to utilize the EEH capability exposed by
the host kernel, and I believe QEMU doesn't use those constants yet.
So it's right time to move those constants to uapi/asm/eeh.h. Once some
one starts to use them, it's impossible to do so.

>I also think this should be folded into 1/1.
>

The reason I didn't fold it to PATCH[1/4]: I was afraid the changs
will be taken via different trees (ppc-next and vfio-next).

Thanks,
Gavin

>-- 
>David Gibson   | I'll have my music baroque, and my code
>david AT gibson.dropbear.id.au | minimalist, thank you.  NOT _the_ _other_
>   | _way_ _around_!
>http://www.ozlabs.org/~dgibson


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 4/4] drivers/vfio: Remove duplicated PE states

2015-03-25 Thread Gavin Shan
On Wed, Mar 25, 2015 at 07:55:41PM -0600, Alex Williamson wrote:
>On Thu, 2015-03-26 at 11:59 +1100, Gavin Shan wrote:
>> On Wed, Mar 25, 2015 at 06:46:28PM -0600, Alex Williamson wrote:
>> >On Thu, 2015-03-26 at 10:20 +1100, Gavin Shan wrote:
>> >> The set of constants for PE states defined in uapi/linux/vfio.h is
>> >> duplicated to uapi/asm/eeh.h. The patch removes the set from the
>> >> former.
>> >> 
>> >> Signed-off-by: Gavin Shan 
>> >> ---
>> >>  include/uapi/linux/vfio.h | 5 -
>> >>  1 file changed, 5 deletions(-)
>> >> 
>> >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> >> index d81c17f..3fd1e86 100644
>> >> --- a/include/uapi/linux/vfio.h
>> >> +++ b/include/uapi/linux/vfio.h
>> >> @@ -492,11 +492,6 @@ struct vfio_eeh_pe_op {
>> >>  #define VFIO_EEH_PE_UNFREEZE_IO  2   /* Enable IO for frozen 
>> >> PE   */
>> >>  #define VFIO_EEH_PE_UNFREEZE_DMA 3   /* Enable DMA for frozen PE  */
>> >>  #define VFIO_EEH_PE_GET_STATE4   /* PE state retrieval   
>> >>  */
>> >> -#define  VFIO_EEH_PE_STATE_NORMAL0   /* PE in functional 
>> >> state*/
>> >> -#define  VFIO_EEH_PE_STATE_RESET 1   /* PE reset in progress  */
>> >> -#define  VFIO_EEH_PE_STATE_STOPPED   2   /* Stopped DMA and IO   
>> >>  */
>> >> -#define  VFIO_EEH_PE_STATE_STOPPED_DMA   4   /* Stopped DMA only 
>> >>  */
>> >> -#define  VFIO_EEH_PE_STATE_UNAVAIL   5   /* State unavailable
>> >>  */
>> >>  #define VFIO_EEH_PE_RESET_DEACTIVATE 5   /* Deassert PE reset
>> >>  */
>> >>  #define VFIO_EEH_PE_RESET_HOT6   /* Assert hot reset 
>> >>  */
>> >>  #define VFIO_EEH_PE_RESET_FUNDAMENTAL7   /* Assert fundamental 
>> >> reset  */
>> >
>> >How do you know that nobody depends on these defines?  I thought the
>> >suggestion was to use the EEH_* defines for error injection, not to
>> >remove existing VFIO_EEH_* defines.  You could certainly redefine these
>> >in terms of EEH_* defines instead.  Thanks,
>> >
>> 
>> QEMU should be the first user to utilize the EEH capability exposed by
>> the host kernel, and I believe QEMU doesn't use those constants yet.
>> So it's right time to move those constants to uapi/asm/eeh.h. Once some
>> one starts to use them, it's impossible to do so.
>
>There are soon to be four kernel versions out there with these defines,
>you can't be sure that nobody has already or won't in the future do
>VFIO/EEH development on those kernels.  The defines need to stay IMHO.
>Thanks,
>

Right, it's the point to keep them. I'll simply drop PATCH[4/4] and
amend the commit log of PATCH[1/4] in next revision, which will be
sent out soon.

Thanks,
Gavin

>Alex
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 3/3] drivers/vfio: Support EEH error injection

2015-03-25 Thread Gavin Shan
The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
to inject the specified EEH error, which is represented by
(struct vfio_eeh_pe_err), to the indicated PE for testing purpose.

Signed-off-by: Gavin Shan 
Reviewed-by: David Gibson 
---
 Documentation/vfio.txt| 12 
 drivers/vfio/vfio_spapr_eeh.c | 10 ++
 include/uapi/linux/vfio.h | 14 +-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978ec..4c746a7 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -385,6 +385,18 @@ The code flow from the example above should be slightly 
changed:
 

 
+   /* Inject EEH error, which is expected to be caused by 32-bits
+* config load.
+*/
+   pe_op.op = VFIO_EEH_PE_INJECT_ERR;
+   pe_op.err.type = EEH_ERR_TYPE_32;
+   pe_op.err.func = EEH_ERR_FUNC_LD_CFG_ADDR;
+   pe_op.err.addr = 0ul;
+   pe_op.err.mask = 0ul;
+   ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+   
+
/* When 0xFF's returned from reading PCI config space or IO BARs
 * of the PCI device. Check the PE's state to see if that has been
 * frozen.
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db..38edeb4 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
case VFIO_EEH_PE_CONFIGURE:
ret = eeh_pe_configure(pe);
break;
+   case VFIO_EEH_PE_INJECT_ERR:
+   minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
+   if (op.argsz < minsz)
+   return -EINVAL;
+   if (copy_from_user(&op, (void __user *)arg, minsz))
+   return -EFAULT;
+
+   ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
+   op.err.addr, op.err.mask);
+   break;
default:
ret = -EINVAL;
}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 82889c3..d81c17f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+   __u32 type;
+   __u32 func;
+   __u64 addr;
+   __u64 mask;
+};
+
 struct vfio_eeh_pe_op {
__u32 argsz;
__u32 flags;
__u32 op;
+   union {
+   struct vfio_eeh_pe_err err;
+   };
 };
 
 #define VFIO_EEH_PE_DISABLE0   /* Disable EEH functionality */
@@ -490,6 +501,7 @@ struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_RESET_HOT  6   /* Assert hot reset  */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL  7   /* Assert fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE  8   /* PE configuration  */
+#define VFIO_EEH_PE_INJECT_ERR 9   /* Inject EEH error  */
 
 #define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21)
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 1/3] powerpc/eeh: Move PE state constants around

2015-03-25 Thread Gavin Shan
There are two equivalent sets of PE state constants, defined in
arch/powerpc/include/asm/eeh.h and include/uapi/linux/vfio.h.
Though the names are different, their corresponding values are
exactly same. The former is used by EEH core and the latter is
used by userspace.

The patch moves those constants from arch/powerpc/include/asm/eeh.h
to arch/powerpc/include/uapi/asm/eeh.h, which are expected to be
used by userspace from now on. We can't delete those constants in
vfio.h as it's uncertain that those constants have been or will be
used by userspace.

Suggested-by: David Gibson 
Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h  |  7 ++-
 arch/powerpc/include/uapi/asm/eeh.h | 30 ++
 2 files changed, 32 insertions(+), 5 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/eeh.h

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d777c0c..08c4042 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -27,6 +27,8 @@
 #include 
 #include 
 
+#include 
+
 struct pci_dev;
 struct pci_bus;
 
@@ -184,11 +186,6 @@ enum {
 #define EEH_STATE_DMA_ACTIVE   (1 << 4)/* Active DMA   */
 #define EEH_STATE_MMIO_ENABLED (1 << 5)/* MMIO enabled */
 #define EEH_STATE_DMA_ENABLED  (1 << 6)/* DMA enabled  */
-#define EEH_PE_STATE_NORMAL0   /* Normal state */
-#define EEH_PE_STATE_RESET 1   /* PE reset asserted*/
-#define EEH_PE_STATE_STOPPED_IO_DMA2   /* Frozen PE*/
-#define EEH_PE_STATE_STOPPED_DMA   4   /* Stopped DMA, Enabled IO */
-#define EEH_PE_STATE_UNAVAIL   5   /* Unavailable  */
 #define EEH_RESET_DEACTIVATE   0   /* Deactivate the PE reset  */
 #define EEH_RESET_HOT  1   /* Hot reset*/
 #define EEH_RESET_FUNDAMENTAL  3   /* Fundamental reset*/
diff --git a/arch/powerpc/include/uapi/asm/eeh.h 
b/arch/powerpc/include/uapi/asm/eeh.h
new file mode 100644
index 000..8bb34b0
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/eeh.h
@@ -0,0 +1,30 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2015
+ *
+ * Authors: Gavin Shan 
+ */
+
+#ifndef _ASM_POWERPC_EEH_H
+#define _ASM_POWERPC_EEH_H
+
+/* PE states */
+#define EEH_PE_STATE_NORMAL0   /* Normal state */
+#define EEH_PE_STATE_RESET 1   /* PE reset asserted*/
+#define EEH_PE_STATE_STOPPED_IO_DMA2   /* Frozen PE*/
+#define EEH_PE_STATE_STOPPED_DMA   4   /* Stopped DMA only */
+#define EEH_PE_STATE_UNAVAIL   5   /* Unavailable  */
+
+#endif /* _ASM_POWERPC_EEH_H */
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 0/3] EEH Error Injection Support for VFIO Devices

2015-03-25 Thread Gavin Shan
The series of patches are extention to EEH support for VFIO PCI devices,
which allows to inject EEH errors to VFIO PCI devices from userspace
for testing purpose.

Changelog
=
v4 -> v5:
* Adjusted commit log for PATCH[1]
* Dropped the last patch which deletes VFIO_EEH_PE_STATE_* from
  uapi/linux/vfio.h because we're uncertain if there're any one
  used or will use them in future as pointed by Alex.W.
v3 -> v4:
* Move constants for EEH PE states defined in uapi/linux/vfio.h
  to uapi/asm/eeh.h.
v2 -> v3:
* Use offsetofend(), instead of sizeof(struct vfio_eeh_pe_op)
  to calculate argument buffer size.
v1 -> v2:
* Use EEH_ERR_FUNC_{MIN,MAX} to validate PCI error function.
* Put additional arguments for error injection to union in
  struct vfio_eeh_pe_op.

Gavin Shan (3):
  powerpc/eeh: Move PE state constants around
  powerpc/eeh: Introduce eeh_pe_inject_err()
  drivers/vfio: Support EEH error injection

 Documentation/vfio.txt  | 12 
 arch/powerpc/include/asm/eeh.h  |  9 +++---
 arch/powerpc/include/uapi/asm/eeh.h | 56 +
 arch/powerpc/kernel/eeh.c   | 35 +++
 drivers/vfio/vfio_spapr_eeh.c   | 10 +++
 include/uapi/linux/vfio.h   | 14 +-
 6 files changed, 130 insertions(+), 6 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/eeh.h

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 2/3] powerpc/eeh: Introduce eeh_pe_inject_err()

2015-03-25 Thread Gavin Shan
The patch defines PCI error types and functions in uapi/asm/eeh.h
and exports function eeh_pe_inject_err(), which will be called by
VFIO driver to inject the specified PCI error to the indicated
PE for testing purpose.

Signed-off-by: Gavin Shan 
Reviewed-by: David Gibson 
---
 arch/powerpc/include/asm/eeh.h  |  2 ++
 arch/powerpc/include/uapi/asm/eeh.h | 26 ++
 arch/powerpc/kernel/eeh.c   | 35 +++
 3 files changed, 63 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 08c4042..cd6003b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -291,6 +291,8 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option);
 int eeh_pe_get_state(struct eeh_pe *pe);
 int eeh_pe_reset(struct eeh_pe *pe, int option);
 int eeh_pe_configure(struct eeh_pe *pe);
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
diff --git a/arch/powerpc/include/uapi/asm/eeh.h 
b/arch/powerpc/include/uapi/asm/eeh.h
index 8bb34b0..291b7d1 100644
--- a/arch/powerpc/include/uapi/asm/eeh.h
+++ b/arch/powerpc/include/uapi/asm/eeh.h
@@ -27,4 +27,30 @@
 #define EEH_PE_STATE_STOPPED_DMA   4   /* Stopped DMA only */
 #define EEH_PE_STATE_UNAVAIL   5   /* Unavailable  */
 
+/* EEH error types and functions */
+#define EEH_ERR_TYPE_320   /* 32-bits error
*/
+#define EEH_ERR_TYPE_641   /* 64-bits error
*/
+#define EEH_ERR_FUNC_MIN   0
+#define EEH_ERR_FUNC_LD_MEM_ADDR   0   /* Memory load  */
+#define EEH_ERR_FUNC_LD_MEM_DATA   1
+#define EEH_ERR_FUNC_LD_IO_ADDR2   /* IO load  */
+#define EEH_ERR_FUNC_LD_IO_DATA3
+#define EEH_ERR_FUNC_LD_CFG_ADDR   4   /* Config load  */
+#define EEH_ERR_FUNC_LD_CFG_DATA   5
+#define EEH_ERR_FUNC_ST_MEM_ADDR   6   /* Memory store */
+#define EEH_ERR_FUNC_ST_MEM_DATA   7
+#define EEH_ERR_FUNC_ST_IO_ADDR8   /* IO store */
+#define EEH_ERR_FUNC_ST_IO_DATA9
+#define EEH_ERR_FUNC_ST_CFG_ADDR   10  /* Config store */
+#define EEH_ERR_FUNC_ST_CFG_DATA   11
+#define EEH_ERR_FUNC_DMA_RD_ADDR   12  /* DMA read */
+#define EEH_ERR_FUNC_DMA_RD_DATA   13
+#define EEH_ERR_FUNC_DMA_RD_MASTER 14
+#define EEH_ERR_FUNC_DMA_RD_TARGET 15
+#define EEH_ERR_FUNC_DMA_WR_ADDR   16  /* DMA write*/
+#define EEH_ERR_FUNC_DMA_WR_DATA   17
+#define EEH_ERR_FUNC_DMA_WR_MASTER 18
+#define EEH_ERR_FUNC_DMA_WR_TARGET 19
+#define EEH_ERR_FUNC_MAX   19
+
 #endif /* _ASM_POWERPC_EEH_H */
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 76253eb..daa68a1 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1636,6 +1636,41 @@ int eeh_pe_configure(struct eeh_pe *pe)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_configure);
 
+/**
+ * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @function: error function
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject the specified PCI error, which
+ * is determined by @type and @function, to the indicated PE for
+ * testing purpose.
+ */
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask)
+{
+   /* Invalid PE ? */
+   if (!pe)
+   return -ENODEV;
+
+   /* Unsupported operation ? */
+   if (!eeh_ops || !eeh_ops->err_inject)
+   return -ENOENT;
+
+   /* Check on PCI error type */
+   if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
+   return -EINVAL;
+
+   /* Check on PCI error function */
+   if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
+   return -EINVAL;
+
+   return eeh_ops->err_inject(pe, type, func, addr, mask);
+}
+EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
if (!eeh_enabled()) {
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v5 3/3] drivers/vfio: Support EEH error injection

2015-03-31 Thread Gavin Shan
On Tue, Mar 31, 2015 at 01:13:26PM -0600, Alex Williamson wrote:
>On Thu, 2015-03-26 at 16:42 +1100, Gavin Shan wrote:
>> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> to inject the specified EEH error, which is represented by
>> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> 
>> Signed-off-by: Gavin Shan 
>> Reviewed-by: David Gibson 
>> ---
>>  Documentation/vfio.txt| 12 
>>  drivers/vfio/vfio_spapr_eeh.c | 10 ++
>>  include/uapi/linux/vfio.h | 14 +-
>>  3 files changed, 35 insertions(+), 1 deletion(-)
>> 
>> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> index 96978ec..4c746a7 100644
>> --- a/Documentation/vfio.txt
>> +++ b/Documentation/vfio.txt
>> @@ -385,6 +385,18 @@ The code flow from the example above should be slightly 
>> changed:
>>  
>>  
>>  
>> +/* Inject EEH error, which is expected to be caused by 32-bits
>> + * config load.
>> + */
>> +pe_op.op = VFIO_EEH_PE_INJECT_ERR;
>> +pe_op.err.type = EEH_ERR_TYPE_32;
>> +pe_op.err.func = EEH_ERR_FUNC_LD_CFG_ADDR;
>> +pe_op.err.addr = 0ul;
>> +pe_op.err.mask = 0ul;
>> +ioctl(container, VFIO_EEH_PE_OP, &pe_op);
>> +
>> +
>> +
>>  /* When 0xFF's returned from reading PCI config space or IO BARs
>>   * of the PCI device. Check the PE's state to see if that has been
>>   * frozen.
>> diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
>> index 5fa42db..38edeb4 100644
>> --- a/drivers/vfio/vfio_spapr_eeh.c
>> +++ b/drivers/vfio/vfio_spapr_eeh.c
>> @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>>  case VFIO_EEH_PE_CONFIGURE:
>>  ret = eeh_pe_configure(pe);
>>  break;
>> +case VFIO_EEH_PE_INJECT_ERR:
>> +minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
>> +if (op.argsz < minsz)
>> +return -EINVAL;
>> +if (copy_from_user(&op, (void __user *)arg, minsz))
>> +return -EFAULT;
>> +
>> +ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
>> +op.err.addr, op.err.mask);
>> +break;
>>  default:
>>  ret = -EINVAL;
>>  }
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index 82889c3..d81c17f 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
>>   * - unfreeze IO/DMA for frozen PE;
>>   * - read PE state;
>>   * - reset PE;
>> - * - configure PE.
>> + * - configure PE;
>> + * - inject EEH error.
>>   */
>> +struct vfio_eeh_pe_err {
>> +__u32 type;
>> +__u32 func;
>> +__u64 addr;
>> +__u64 mask;
>> +};
>> +
>>  struct vfio_eeh_pe_op {
>>  __u32 argsz;
>>  __u32 flags;
>>  __u32 op;
>> +union {
>> +struct vfio_eeh_pe_err err;
>> +};
>>  };
>>  
>>  #define VFIO_EEH_PE_DISABLE 0   /* Disable EEH functionality */
>> @@ -490,6 +501,7 @@ struct vfio_eeh_pe_op {
>>  #define VFIO_EEH_PE_RESET_HOT   6   /* Assert hot reset 
>>  */
>>  #define VFIO_EEH_PE_RESET_FUNDAMENTAL   7   /* Assert fundamental 
>> reset  */
>>  #define VFIO_EEH_PE_CONFIGURE   8   /* PE configuration 
>>  */
>> +#define VFIO_EEH_PE_INJECT_ERR  9   /* Inject EEH error 
>>  */
>>  
>>  #define VFIO_EEH_PE_OP  _IO(VFIO_TYPE, VFIO_BASE + 21)
>>  
>
>I assume you want this to go in through the PPC tree, so
>
>Acked-by: Alex Williamson 
>

Thanks, Alex.W. Yes, It can go via PPC tree.

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/4] PCI: Export MSI message relevant functions

2014-09-04 Thread Gavin Shan
On Thu, Sep 04, 2014 at 04:57:36PM -0600, Bjorn Helgaas wrote:
>On Mon, May 19, 2014 at 01:01:07PM +1000, Gavin Shan wrote:
>> The patch exports 2 MSI message relevant functions, which will be
>> used by VFIO PCI driver. The VFIO PCI driver would be built as
>> a module.
>> 
>> Signed-off-by: Gavin Shan 
>
>Acked-by: Bjorn Helgaas 
>
>I think Alex will merge this along with the other ones.  Sorry this
>took so long.  I don't really like this, but I just can't figure out
>any solution that's better.
>

Thanks, Bjorn. I thought you must forget this. Lets get it in firstly
and I'll do more investigation later to see if I can figure out something
better.

Thanks,
Gavin

>> ---
>>  drivers/pci/msi.c | 2 ++
>>  1 file changed, 2 insertions(+)
>> 
>> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
>> index 955ab79..2350271 100644
>> --- a/drivers/pci/msi.c
>> +++ b/drivers/pci/msi.c
>> @@ -324,6 +324,7 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg 
>> *msg)
>>  
>>  __get_cached_msi_msg(entry, msg);
>>  }
>> +EXPORT_SYMBOL_GPL(get_cached_msi_msg);
>>  
>>  void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
>>  {
>> @@ -368,6 +369,7 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
>>  
>>  __write_msi_msg(entry, msg);
>>  }
>> +EXPORT_SYMBOL_GPL(write_msi_msg);
>>  
>>  static void free_msi_irqs(struct pci_dev *dev)
>>  {
>> -- 
>> 1.8.3.2
>> 
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] vfio/pci: Restore MSIx message prior to enabling

2014-09-10 Thread Gavin Shan
On Mon, May 19, 2014 at 01:01:10PM +1000, Gavin Shan wrote:
>The MSIx vector table lives in device memory, which may be cleared as
>part of a backdoor device reset. This is the case on the IBM IPR HBA
>when the BIST is run on the device. When assigned to a QEMU guest,
>the guest driver does a pci_save_state(), issues a BIST, then does a
>pci_restore_state(). The BIST clears the MSIx vector table, but due
>to the way interrupts are configured the pci_restore_state() does not
>restore the vector table as expected. Eventually this results in an
>EEH error on Power platforms when the device attempts to signal an
>interrupt with the zero'd table entry.
>
>Fix the problem by restoring the host cached MSI message prior to
>enabling each vector.
>
>Reported-by: Wen Xiong 
>Signed-off-by: Gavin Shan 
>Signed-off-by: Alex Williamson 

Alex, please let me know if I need resend this one to you. The patch
has been pending for long time, I'm not sure if you still can grab
it somewhere.

As you might see, Bjorn will take that one with PCI changes. This patch
depends on the changes.

Thanks,
Gavin

>---
> drivers/vfio/pci/vfio_pci_intrs.c | 15 +++
> 1 file changed, 15 insertions(+)
>
>diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
>b/drivers/vfio/pci/vfio_pci_intrs.c
>index 9dd49c9..553212f 100644
>--- a/drivers/vfio/pci/vfio_pci_intrs.c
>+++ b/drivers/vfio/pci/vfio_pci_intrs.c
>@@ -16,6 +16,7 @@
> #include 
> #include 
> #include 
>+#include 
> #include 
> #include 
> #include 
>@@ -548,6 +549,20 @@ static int vfio_msi_set_vector_signal(struct 
>vfio_pci_device *vdev,
>   return PTR_ERR(trigger);
>   }
>
>+  /*
>+   * The MSIx vector table resides in device memory which may be cleared
>+   * via backdoor resets. We don't allow direct access to the vector
>+   * table so even if a userspace driver attempts to save/restore around
>+   * such a reset it would be unsuccessful. To avoid this, restore the
>+   * cached value of the message prior to enabling.
>+   */
>+  if (msix) {
>+  struct msi_msg msg;
>+
>+  get_cached_msi_msg(irq, &msg);
>+  write_msi_msg(irq, &msg);
>+  }
>+
>   ret = request_irq(irq, vfio_msihandler, 0,
> vdev->ctx[vector].name, trigger);
>   if (ret) {
>-- 
>1.8.3.2
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] vfio/pci: Restore MSIx message prior to enabling

2014-09-25 Thread Gavin Shan
On Wed, Sep 10, 2014 at 06:13:42PM +1000, Gavin Shan wrote:
>On Mon, May 19, 2014 at 01:01:10PM +1000, Gavin Shan wrote:
>>The MSIx vector table lives in device memory, which may be cleared as
>>part of a backdoor device reset. This is the case on the IBM IPR HBA
>>when the BIST is run on the device. When assigned to a QEMU guest,
>>the guest driver does a pci_save_state(), issues a BIST, then does a
>>pci_restore_state(). The BIST clears the MSIx vector table, but due
>>to the way interrupts are configured the pci_restore_state() does not
>>restore the vector table as expected. Eventually this results in an
>>EEH error on Power platforms when the device attempts to signal an
>>interrupt with the zero'd table entry.
>>
>>Fix the problem by restoring the host cached MSI message prior to
>>enabling each vector.
>>
>>Reported-by: Wen Xiong 
>>Signed-off-by: Gavin Shan 
>>Signed-off-by: Alex Williamson 
>
>Alex, please let me know if I need resend this one to you. The patch
>has been pending for long time, I'm not sure if you still can grab
>it somewhere.
>
>As you might see, Bjorn will take that one with PCI changes. This patch
>depends on the changes.
>

Alex, I guess you probably missed last reply. Bjorn acked the first
patch and you can pick both of them if I understand correctly. Please
let me know if I need resend those 2 patches?

Thanks,
Gavin

>Thanks,
>Gavin
>
>>---
>> drivers/vfio/pci/vfio_pci_intrs.c | 15 +++
>> 1 file changed, 15 insertions(+)
>>
>>diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
>>b/drivers/vfio/pci/vfio_pci_intrs.c
>>index 9dd49c9..553212f 100644
>>--- a/drivers/vfio/pci/vfio_pci_intrs.c
>>+++ b/drivers/vfio/pci/vfio_pci_intrs.c
>>@@ -16,6 +16,7 @@
>> #include 
>> #include 
>> #include 
>>+#include 
>> #include 
>> #include 
>> #include 
>>@@ -548,6 +549,20 @@ static int vfio_msi_set_vector_signal(struct 
>>vfio_pci_device *vdev,
>>  return PTR_ERR(trigger);
>>  }
>>
>>+ /*
>>+  * The MSIx vector table resides in device memory which may be cleared
>>+  * via backdoor resets. We don't allow direct access to the vector
>>+  * table so even if a userspace driver attempts to save/restore around
>>+  * such a reset it would be unsuccessful. To avoid this, restore the
>>+  * cached value of the message prior to enabling.
>>+  */
>>+ if (msix) {
>>+ struct msi_msg msg;
>>+
>>+ get_cached_msi_msg(irq, &msg);
>>+ write_msi_msg(irq, &msg);
>>+ }
>>+
>>  ret = request_irq(irq, vfio_msihandler, 0,
>>vdev->ctx[vector].name, trigger);
>>  if (ret) {
>>-- 
>>1.8.3.2
>>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] vfio/pci: Restore MSIx message prior to enabling

2014-09-26 Thread Gavin Shan
On Thu, Sep 25, 2014 at 09:46:44PM -0600, Alex Williamson wrote:
>On Fri, 2014-09-26 at 13:19 +1000, Gavin Shan wrote:
>> On Wed, Sep 10, 2014 at 06:13:42PM +1000, Gavin Shan wrote:
>> >On Mon, May 19, 2014 at 01:01:10PM +1000, Gavin Shan wrote:
>> >>The MSIx vector table lives in device memory, which may be cleared as
>> >>part of a backdoor device reset. This is the case on the IBM IPR HBA
>> >>when the BIST is run on the device. When assigned to a QEMU guest,
>> >>the guest driver does a pci_save_state(), issues a BIST, then does a
>> >>pci_restore_state(). The BIST clears the MSIx vector table, but due
>> >>to the way interrupts are configured the pci_restore_state() does not
>> >>restore the vector table as expected. Eventually this results in an
>> >>EEH error on Power platforms when the device attempts to signal an
>> >>interrupt with the zero'd table entry.
>> >>
>> >>Fix the problem by restoring the host cached MSI message prior to
>> >>enabling each vector.
>> >>
>> >>Reported-by: Wen Xiong 
>> >>Signed-off-by: Gavin Shan 
>> >>Signed-off-by: Alex Williamson 
>> >
>> >Alex, please let me know if I need resend this one to you. The patch
>> >has been pending for long time, I'm not sure if you still can grab
>> >it somewhere.
>> >
>> >As you might see, Bjorn will take that one with PCI changes. This patch
>> >depends on the changes.
>> >
>> 
>> Alex, I guess you probably missed last reply. Bjorn acked the first
>> patch and you can pick both of them if I understand correctly. Please
>> let me know if I need resend those 2 patches?
>
>Please update the patches, add Bjorn's ACK, test and resend.  I'd like
>to at least know that it still applies and resolves the problem on the
>current code base since the patch is 4 months old.  Thanks,
>

Retested and it helps avoiding unexpected EEH error as before though
the error because of MSIx message lost is eventually progagated to
guest and the adapter is recovered successfully by the feature
"EEH support for guest". I'll resend it with Bjorn's ack.

Thanks,
Gavin

>Alex
>
>> >>---
>> >> drivers/vfio/pci/vfio_pci_intrs.c | 15 +++
>> >> 1 file changed, 15 insertions(+)
>> >>
>> >>diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
>> >>b/drivers/vfio/pci/vfio_pci_intrs.c
>> >>index 9dd49c9..553212f 100644
>> >>--- a/drivers/vfio/pci/vfio_pci_intrs.c
>> >>+++ b/drivers/vfio/pci/vfio_pci_intrs.c
>> >>@@ -16,6 +16,7 @@
>> >> #include 
>> >> #include 
>> >> #include 
>> >>+#include 
>> >> #include 
>> >> #include 
>> >> #include 
>> >>@@ -548,6 +549,20 @@ static int vfio_msi_set_vector_signal(struct 
>> >>vfio_pci_device *vdev,
>> >>   return PTR_ERR(trigger);
>> >>   }
>> >>
>> >>+  /*
>> >>+   * The MSIx vector table resides in device memory which may be cleared
>> >>+   * via backdoor resets. We don't allow direct access to the vector
>> >>+   * table so even if a userspace driver attempts to save/restore around
>> >>+   * such a reset it would be unsuccessful. To avoid this, restore the
>> >>+   * cached value of the message prior to enabling.
>> >>+   */
>> >>+  if (msix) {
>> >>+  struct msi_msg msg;
>> >>+
>> >>+  get_cached_msi_msg(irq, &msg);
>> >>+  write_msi_msg(irq, &msg);
>> >>+  }
>> >>+
>> >>   ret = request_irq(irq, vfio_msihandler, 0,
>> >> vdev->ctx[vector].name, trigger);
>> >>   if (ret) {
>> >>-- 
>> >>1.8.3.2
>> >>
>> 
>
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RESEND PATCH v3 2/2] vfio/pci: Restore MSIx message prior to enabling

2014-09-26 Thread Gavin Shan
The MSIx vector table lives in device memory, which may be cleared as
part of a backdoor device reset. This is the case on the IBM IPR HBA
when the BIST is run on the device. When assigned to a QEMU guest,
the guest driver does a pci_save_state(), issues a BIST, then does a
pci_restore_state(). The BIST clears the MSIx vector table, but due
to the way interrupts are configured the pci_restore_state() does not
restore the vector table as expected. Eventually this results in an
EEH error on Power platforms when the device attempts to signal an
interrupt with the zero'd table entry.

Fix the problem by restoring the host cached MSI message prior to
enabling each vector.

Reported-by: Wen Xiong 
Signed-off-by: Gavin Shan 
Signed-off-by: Alex Williamson 
---
 drivers/vfio/pci/vfio_pci_intrs.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 9dd49c9..553212f 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -548,6 +549,20 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
return PTR_ERR(trigger);
}
 
+   /*
+* The MSIx vector table resides in device memory which may be cleared
+* via backdoor resets. We don't allow direct access to the vector
+* table so even if a userspace driver attempts to save/restore around
+* such a reset it would be unsuccessful. To avoid this, restore the
+* cached value of the message prior to enabling.
+*/
+   if (msix) {
+   struct msi_msg msg;
+
+   get_cached_msi_msg(irq, &msg);
+   write_msi_msg(irq, &msg);
+   }
+
ret = request_irq(irq, vfio_msihandler, 0,
  vdev->ctx[vector].name, trigger);
if (ret) {
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RESEND PATCH v3 1/2] PCI: Export MSI message relevant functions

2014-09-26 Thread Gavin Shan
The patch exports 2 MSI message relevant functions, which will be
used by VFIO PCI driver. The VFIO PCI driver would be built as
a module.

Signed-off-by: Gavin Shan 
Acked-by: Bjorn Helgaas 
---
 drivers/pci/msi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5a40516..e468d65 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -312,6 +312,7 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg 
*msg)
 
__get_cached_msi_msg(entry, msg);
 }
+EXPORT_SYMBOL_GPL(get_cached_msi_msg);
 
 void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
@@ -356,6 +357,7 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
 
__write_msi_msg(entry, msg);
 }
+EXPORT_SYMBOL_GPL(write_msi_msg);
 
 static void free_msi_irqs(struct pci_dev *dev)
 {
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] drivers/vfio: Export vfio_spapr_iommu_eeh_ioctl() with GPL

2014-09-26 Thread Gavin Shan
The function should have been exported with EXPORT_SYMBOL_GPL()
as part of commit 92d18a68 ("drivers/vfio: Fix EEH build error").

Suggested-by: Alexey Kardashevskiy 
Signed-off-by: Gavin Shan 
---
 drivers/vfio/vfio_spapr_eeh.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 86dfceb..5fa42db 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -92,7 +92,7 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 
return ret;
 }
-EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
+EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl);
 
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] drivers/vfio/pci: Fix wrong MSI interrupt count

2014-03-02 Thread Gavin Shan
According PCI local bus specification, the register of Message
Control for MSI (offset: 2, length: 2) has bit#0 to enable or
disable MSI logic and it shouldn't be part contributing to the
calculation of MSI interrupt count.

The patch fixes above issue. Also, the patch renames local variable
"flags" to "ctl" for both MSI and MSIx case.

Signed-off-by: Gavin Shan 
---
 drivers/vfio/pci/vfio_pci.c |   14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 7ba0424..5760ea6 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -190,25 +190,23 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device 
*vdev, int irq_type)
 
} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
u8 pos;
-   u16 flags;
+   u16 ctl;
 
pos = vdev->pdev->msi_cap;
if (pos) {
pci_read_config_word(vdev->pdev,
-pos + PCI_MSI_FLAGS, &flags);
-
-   return 1 << (flags & PCI_MSI_FLAGS_QMASK);
+pos + PCI_MSI_FLAGS, &ctl);
+   return 1 << ((ctl & PCI_MSI_FLAGS_QMASK) >> 1);
}
} else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
u8 pos;
-   u16 flags;
+   u16 ctl;
 
pos = vdev->pdev->msix_cap;
if (pos) {
pci_read_config_word(vdev->pdev,
-pos + PCI_MSIX_FLAGS, &flags);
-
-   return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
+pos + PCI_MSIX_FLAGS, &ctl);
+   return (ctl & PCI_MSIX_FLAGS_QSIZE) + 1;
}
} else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
if (pci_is_pcie(vdev->pdev))
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] drivers/vfio: Rework offsetofend()

2014-03-02 Thread Gavin Shan
The macro offsetofend() introduces unnecessary temporary variable
"tmp". The patch avoids that and saves a bit memory in stack.

Signed-off-by: Gavin Shan 
---
 include/linux/vfio.h |5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 24579a0..43f6bf4 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -86,9 +86,8 @@ extern void vfio_unregister_iommu_driver(
  * from user space.  This allows us to easily determine if the provided
  * structure is sized to include various fields.
  */
-#define offsetofend(TYPE, MEMBER) ({   \
-   TYPE tmp;   \
-   offsetof(TYPE, MEMBER) + sizeof(tmp.MEMBER); }) \
+#define offsetofend(TYPE, MEMBER) \
+   (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
 
 /*
  * External user API
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] drivers/vfio/pci: Fix MSIx message lost

2014-03-02 Thread Gavin Shan
The problem is specific to the case of BIST issued applied to IPR
adapter on the guest side. After BIST reset, we lose everything
in MSIx table and we never have chance update MSIx messages for
those enabled interrupts to MSIx table.

The patch fixes it by writing MSIx message to MSIx table before
reenabling them.

Reported-by: Wen Xiong 
Signed-off-by: Gavin Shan 
---
 drivers/vfio/pci/vfio_pci_intrs.c |   19 +++
 1 file changed, 19 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 2103576..279ebd0 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -517,6 +518,7 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
struct pci_dev *pdev = vdev->pdev;
int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
char *name = msix ? "vfio-msix" : "vfio-msi";
+   struct msi_msg msg;
struct eventfd_ctx *trigger;
int ret;
 
@@ -544,6 +546,23 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
return PTR_ERR(trigger);
}
 
+   /* We possiblly lose the MSI/MSIx message in some cases.
+* For example, BIST reset on IPR adapter. The MSIx table
+* is cleaned out. However, we never get chance to put
+* MSIx messages to MSIx table because all MSIx stuff is
+* being cached in QEMU. Here, we had the trick to put the
+* MSI/MSIx message back.
+*
+* Basically, we needn't worry about MSI messages. However,
+* it's not harmful and there might be cases of PCI config data
+* lost because of cached PCI config data in QEMU again.
+*
+* Note that we should flash the message prior to enabling
+* the corresponding interrupt by request_irq().
+*/
+get_cached_msi_msg(irq, &msg);
+write_msi_msg(irq, &msg);
+
ret = request_irq(irq, vfio_msihandler, 0,
  vdev->ctx[vector].name, trigger);
if (ret) {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] drivers/vfio/pci: Fix wrong MSI interrupt count

2014-03-09 Thread Gavin Shan
According PCI local bus specification, the register of Message
Control for MSI (offset: 2, length: 2) has bit#0 to enable or
disable MSI logic and it shouldn't be part contributing to the
calculation of MSI interrupt count. The patch fixes the issue.

Signed-off-by: Gavin Shan 
---
 drivers/vfio/pci/vfio_pci.c |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 7ba0424..6b8cd07 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -196,8 +196,7 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device 
*vdev, int irq_type)
if (pos) {
pci_read_config_word(vdev->pdev,
 pos + PCI_MSI_FLAGS, &flags);
-
-   return 1 << (flags & PCI_MSI_FLAGS_QMASK);
+   return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);
}
} else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
u8 pos;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] drivers/vfio: Rework offsetofend()

2014-03-09 Thread Gavin Shan
The macro offsetofend() introduces unnecessary temporary variable
"tmp". The patch avoids that and saves a bit memory in stack.

Signed-off-by: Gavin Shan 
---
 include/linux/vfio.h |5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 24579a0..43f6bf4 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -86,9 +86,8 @@ extern void vfio_unregister_iommu_driver(
  * from user space.  This allows us to easily determine if the provided
  * structure is sized to include various fields.
  */
-#define offsetofend(TYPE, MEMBER) ({   \
-   TYPE tmp;   \
-   offsetof(TYPE, MEMBER) + sizeof(tmp.MEMBER); }) \
+#define offsetofend(TYPE, MEMBER) \
+   (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
 
 /*
  * External user API
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] VFIO Bug Fixes

2014-03-09 Thread Gavin Shan
v1 -> v2:
* Don't change the name of variable "flags" in [PATCH 2/3].
* Comment and commit log cleanup in [PATCH 3/3].

Gavin Shan (3):
  drivers/vfio: Rework offsetofend()
  drivers/vfio/pci: Fix wrong MSI interrupt count
  drivers/vfio/pci: Fix MSIx message lost

---

 drivers/vfio/pci/vfio_pci.c   |3 +--
 drivers/vfio/pci/vfio_pci_intrs.c |   11 +++
 include/linux/vfio.h  |5 ++---
 3 files changed, 14 insertions(+), 5 deletions(-)

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] drivers/vfio/pci: Fix MSIx message lost

2014-03-09 Thread Gavin Shan
The problem is specific to the case of BIST reset issued to IPR
adapter on the guest side. The IPR driver calls pci_save_state(),
issues BIST reset and then pci_restore_state(). The issued BIST
cleans out MSIx table and pci_restore_state() doesn't restore
the MSIx table as expected. Eventually, MSIx messages with all
zeros are sent and causes EEH error on Power platform.

The patch fixes it by writing MSIx message to MSIx table before
reenabling the individual interrupts in the following path:

qemu/hw/pci/msix.c::msix_table_mmio_write
msix_handle_mask_update
msix_fire_vector_notifier
qemu/hw/misc/vfio.c::vfio_msix_vector_use
vfio_msix_vector_do_use

IOCTL Command VFIO_DEVICE_SET_IRQS to VFIO-PCI
vfio/pci/vfio_pci.c::vfio_pci_ioctl
vfio/pci/vfio_pci_intrs.c::vfio_pci_set_irqs_ioctl
vfio_pci_set_msi_trigger
vfio_msi_set_block
vfio_msi_set_vector_signal

Reported-by: Wen Xiong 
Signed-off-by: Gavin Shan 
---
 drivers/vfio/pci/vfio_pci_intrs.c |   11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 2103576..83e0638 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -517,6 +518,7 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
struct pci_dev *pdev = vdev->pdev;
int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
char *name = msix ? "vfio-msix" : "vfio-msi";
+   struct msi_msg msg;
struct eventfd_ctx *trigger;
int ret;
 
@@ -544,6 +546,15 @@ static int vfio_msi_set_vector_signal(struct 
vfio_pci_device *vdev,
return PTR_ERR(trigger);
}
 
+   /* We possiblly lose the MSI/MSIx message in some cases, one
+* of which is pci_save_state(), BIST reset and pci_restore_state()
+* for IPR adapter. The BIST reset cleans out MSIx table and we
+* don't have chance to restore it. Here, we have the trick to
+* restore it before enabling individual interrupts. For MSI messages,
+* it's harmless to write them back.
+*/
+   get_cached_msi_msg(irq, &msg);
+   write_msi_msg(irq, &msg);
ret = request_irq(irq, vfio_msihandler, 0,
  vdev->ctx[vector].name, trigger);
if (ret) {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/22] powerpc: Introduce CONFIG_KVM_EEH

2014-05-04 Thread Gavin Shan
The patch introduces kernel configuration option KVM_EEH, which
depends on KVM_BOOK3S_64, VFIO_IOMMU_SPAPR_TCE and EEH. The option
is to enable emulating EEH RTAS services that required by EEH
module in pSeries-based guest.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kvm/Kconfig | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 141b202..743d2d9 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -189,6 +189,14 @@ config KVM_XICS
  Specification) interrupt controller architecture used on
  IBM POWER (pSeries) servers.
 
+config KVM_EEH
+   bool "KVM in-kernel EEH RTAS emulation"
+   depends on PPC_POWERNV && KVM_BOOK3S_64 && EEH && VFIO_IOMMU_SPAPR_TCE
+   default y
+   ---help---
+ Enable support for emulating EEH RTAS services used on IBM
+ POWER (pSeries) servers.
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/22] powerpc/eeh: Release VFIO dev on VM destruction

2014-05-04 Thread Gavin Shan
When the VM is destroyed, the EEH devices and PEs that have been
marked as being owned by guest should be returned to host. The
patch introduces kvmppc_vfio_pci_free() to do it.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h |  6 +-
 arch/powerpc/kernel/eeh_pe.c   | 42 ++
 arch/powerpc/kvm/book3s_hv.c   |  2 ++
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 3807167..677c719 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -380,6 +380,8 @@ static inline void eeh_add_sysfs_files(struct pci_bus *bus) 
{ }
 
 static inline void eeh_remove_device(struct pci_dev *dev) { }
 
+static inline void kvmppc_eeh_vfio_release(struct kvm *kvm) { }
+
 #define EEH_POSSIBLE_ERROR(val, type) (0)
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
 #endif /* CONFIG_EEH */
@@ -388,7 +390,9 @@ static inline void eeh_remove_device(struct pci_dev *dev) { 
}
 #ifdef CONFIG_KVM_EEH
 struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr *addr);
 struct eeh_pe *eeh_vfio_pe_get(struct eeh_vfio_pci_addr *addr);
-
+void kvmppc_eeh_vfio_release(struct kvm *kvm);
+#else
+static inline void kvmppc_eeh_vfio_release(void *kvm) { };
 #endif /* CONFIG_KVM_EEH */
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 1bd7b1f..9e73188 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -331,6 +331,48 @@ struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr 
*addr)
 
return NULL;
 }
+
+static void *__kvmppc_eeh_vfio_release(void *data, void *flag)
+{
+   struct eeh_pe *pe = (struct eeh_pe *)data;
+   struct kvm *kvm = (struct kvm *)flag;
+   struct eeh_dev *edev, *tmp;
+
+   if (!eeh_pe_passed(pe))
+   return NULL;
+
+   eeh_pe_for_each_dev(pe, edev, tmp) {
+   if (!eeh_dev_passed(edev))
+   continue;
+
+   if (edev->gaddr.kvm == kvm)
+   eeh_dev_set_passed(edev, false);
+   }
+
+   eeh_pe_set_passed(pe, false);
+
+   return NULL;
+}
+
+/**
+ * kvmppc_eeh_vfio_release - Release VFIO devices for the given VM
+ * @kvm: VM indicator
+ *
+ * The function is expected to be called while the VM is destroyed.
+ * In turn, the PCI devices that have been passed to that VM should
+ * be released and their address mapping maintained will be destroyed.
+ */
+void kvmppc_eeh_vfio_release(struct kvm *kvm)
+{
+   struct eeh_pe *root;
+   void *ret;
+
+   list_for_each_entry(root, &eeh_phb_pe, child) {
+   ret = eeh_pe_traverse(root, __kvmppc_eeh_vfio_release, kvm);
+   if (ret) return;
+   }
+}
+EXPORT_SYMBOL_GPL(kvmppc_eeh_vfio_release);
 #endif /* CONFIG_KVM_EEH */
 
 /**
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8227dba..f07a12d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -49,6 +49,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2344,6 +2345,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
kvm->arch.rma = NULL;
}
 
+   kvmppc_eeh_vfio_release(kvm);
kvmppc_free_hpt(kvm);
 }
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 20/22] powerpc/kvm: Infrastructure for error injection

2014-05-04 Thread Gavin Shan
The patch intends to implements the infrastructure for error injection.
RTAS calls "ibm,{open-errinjct, close-errinjct, errinjct}" are handled
in the host directly. Each VM is allowed to have one opened token at
once.

There're multiple types of error injection to be supported by the system.
So we maintain an array of handlers with error type as index. The array
supports dynamic registration.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/book3s_errinjct.h |  78 +++
 arch/powerpc/kvm/Makefile  |   3 +
 arch/powerpc/kvm/book3s_errinjct.c | 329 +
 arch/powerpc/kvm/book3s_rtas.c |  29 ++-
 4 files changed, 438 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/book3s_errinjct.h
 create mode 100644 arch/powerpc/kvm/book3s_errinjct.c

diff --git a/arch/powerpc/include/asm/book3s_errinjct.h 
b/arch/powerpc/include/asm/book3s_errinjct.h
new file mode 100644
index 000..35712be
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s_errinjct.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __POWERPC_BOOK3S_ERRINJCT_H__
+#define __POWERPC_BOOK3S_ERRINJCT_H__
+
+/* Error injection handler */
+typedef int (*kvm_errinjct_func)(struct kvm_vcpu *vcpu, rtas_arg_t buf);
+
+#ifdef CONFIG_KVM_ERRINJCT
+
+/* RTAS services for error injection */
+enum {
+   kvm_errinjct_open_token,
+   kvm_errinjct_close_token,
+   kvm_errinjct_errinjct
+};
+
+/* Supported types of error injection */
+enum {
+   kvm_errinjct_min = 0,
+   kvm_errinjct_fatal,
+   kvm_errinjct_recover_random_evt,
+   kvm_errinjct_recover_special_evt,
+   kvm_errinjct_corrupted_page,
+   kvm_errinjct_corrupted_slb,
+   kvm_errinjct_translator_failure,
+   kvm_errinjct_ioa_bus_error,
+   kvm_errinjct_ioa_bus_error_64,
+   kvm_errinjct_platform_specific,
+   kvm_errinjct_corrupted_dcache_start,
+   kvm_errinjct_corrupted_dcache_end,
+   kvm_errinjct_corrupted_icache_start,
+   kvm_errinjct_corrupted_icache_end,
+   kvm_errinjct_corrupted_tlb_start,
+   kvm_errinjct_corrupted_tlb_end,
+   kvm_errinjct_upstream_io_error,
+   kvm_errinjct_max
+};
+
+/* Handler for specific type of error injection */
+struct kvm_errinjct_handler {
+   int opcode;
+   kvm_errinjct_func handler;
+};
+
+/* Tokens that have been opened */
+struct kvm_errinjct_token {
+   struct kvm *kvm;
+   int token;
+   struct list_head list;
+};
+
+int kvm_errinjct_register(int opcode, kvm_errinjct_func handler);
+int kvm_errinjct_unregister(int opcode);
+void kvmppc_errinjct_rtas(struct kvm_vcpu *vcpu,
+ struct rtas_args *args, int flag);
+
+#else
+
+static inline int kvm_errinjct_register(int opcode,
+   kvm_errinjct_func handler)
+{
+   return 0;
+}
+
+static inline int kvm_errinjct_unregister(int opcode);
+{
+   return 0;
+}
+
+#endif /* CONFIG_KVM_ERRINJCT */
+#endif /* __POWERPC_BOOK3S_ERRINJCT_H__ */
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 673038d..f221f66 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -97,6 +97,9 @@ endif
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
 
+kvm-book3s_64-objs-$(CONFIG_KVM_ERRINJCT) += \
+   book3s_errinjct.o
+
 kvm-book3s_64-objs-$(CONFIG_KVM_VFIO) += \
$(addprefix ../../../virt/kvm/, vfio.o)
 
diff --git a/arch/powerpc/kvm/book3s_errinjct.c 
b/arch/powerpc/kvm/book3s_errinjct.c
new file mode 100644
index 000..27a49ab
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_errinjct.c
@@ -0,0 +1,329 @@
+/*
+ * The file intends to implement RTAS errinjct functionality for book3s
+ * architecture. Due to the individual errors injected to the system
+ * are defined by device tree node, it's reasonable to introduce the
+ * mechanism to register the supported errors and their corresponding
+ * handlers.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static struct kvm_errinjct_handler handlers[kvm_errinjct_max];
+static DEFINE_SPINLOCK(handler_lock);
+static LIST_HEAD(open_token_list);
+static DEFINE_SPINLOCK(token_lock);
+static unsigned long *token_bitmap = NULL;
+static int token_max = 1024;
+
+/**
+ * kvm_errinjct_register - Regi

[PATCH 22/22] powerpc/powernv: Support PCI error injection

2014-05-04 Thread Gavin Shan
The patch introduces the infrastructure of error injection backend
for PowerNV platform. For now, we just implement logic to inject
PCI errors. We need support injecting other types of errors in
future.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/book3s_errinjct.h |  19 +++
 arch/powerpc/platforms/powernv/Makefile|   1 +
 arch/powerpc/platforms/powernv/errinjct.c  | 215 +
 3 files changed, 235 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/errinjct.c

diff --git a/arch/powerpc/include/asm/book3s_errinjct.h 
b/arch/powerpc/include/asm/book3s_errinjct.h
index 35712be..75443ad 100644
--- a/arch/powerpc/include/asm/book3s_errinjct.h
+++ b/arch/powerpc/include/asm/book3s_errinjct.h
@@ -56,6 +56,25 @@ struct kvm_errinjct_token {
struct list_head list;
 };
 
+/* Argument buffer for various operations */
+struct kvm_errinjct_ioa_bus {
+   uint32_t addr;
+   uint32_t mask;
+   uint32_t cfg_addr;
+   uint32_t buid_hi;
+   uint32_t buid_lo;
+   uint32_t op;
+};
+
+struct kvm_errinjct_ioa_bus64 {
+   uint64_t addr;
+   uint64_t mask;
+   uint32_t cfg_addr;
+   uint32_t buid_hi;
+   uint32_t buid_lo;
+   uint32_t op;
+};
+
 int kvm_errinjct_register(int opcode, kvm_errinjct_func handler);
 int kvm_errinjct_unregister(int opcode);
 void kvmppc_errinjct_rtas(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index d8ea670..d096b18 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -7,5 +7,6 @@ obj-$(CONFIG_SMP)   += smp.o
 obj-$(CONFIG_PCI)  += pci.o pci-p5ioc2.o pci-ioda.o
 obj-$(CONFIG_EEH)  += eeh-ioda.o eeh-powernv.o
 obj-$(CONFIG_KVM_EEH)  += eeh-rtas.o
+obj-$(CONFIG_KVM_ERRINJCT) += errinjct.o
 obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)   += opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/errinjct.c 
b/arch/powerpc/platforms/powernv/errinjct.c
new file mode 100644
index 000..ccc7853
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/errinjct.c
@@ -0,0 +1,215 @@
+/*
+ * Backend for error injection implemented on PowerNV platform.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "powernv.h"
+#include "pci.h"
+
+static int powernv_errinjct_ioa(struct kvm_vcpu *vcpu, rtas_arg_t buf)
+{
+   struct OpalErrinjct ej;
+   struct kvm_errinjct_ioa_bus args;
+   struct eeh_vfio_pci_addr addr;
+   struct eeh_pe *pe;
+   struct pnv_phb *phb;
+   long rc;
+   int ret = 0;
+
+   /* Word aligned buffer */
+   if (buf & 0x3) {
+   ret = -3;
+   goto out;
+   }
+
+   /* Copy over argument */
+   ret = kvm_read_guest(vcpu->kvm, buf, &args, sizeof(args));
+   if (ret) {
+   pr_warn("%s: Can't copyover arguments (%d)\n",
+   __func__, ret);
+   ret = -3;
+   goto out;
+   }
+
+   /*
+* Sanity check on operation. We don't support optional
+* operation (20) and last one (21) for now.
+*/
+   if (args.op < 0 || args.op > 21) {
+   ret = -3;
+   goto out;
+   } else if (args.op >= 20) {
+   ret = -1;
+   goto out;
+   }
+
+   /*
+* Only do error injection on passthrou PE. It's notable
+* the "cfg_addr" is guest PE address
+*/
+   addr.kvm = vcpu->kvm;
+   addr.buid_hi = args.buid_hi;
+   addr.buid_lo = args.buid_lo;
+   addr.pe_addr = args.cfg_addr;
+   pe = eeh_vfio_pe_get(&addr);
+   if (!pe) {
+   pr_warn("%s: Can't find passed PE (%08x-%08x-%08x)\n",
+   __func__, args.buid_hi, args.buid_lo, args.cfg_addr);
+   ret = -3;
+   goto out;
+   }
+
+   /*
+* Calling to OPAL API. We need host PE address
+* and PHB host BUID.
+*/
+   phb = pe->phb->private_data;
+
+   ej.type = OpalErrinjctTypeIoaBusError;
+   ej.ioa.addr = args.addr;
+   ej.ioa.mask = args.mask;
+   ej.ioa.phb_id   = phb->opal_id;
+   ej.ioa.pe   = pe->addr;
+   ej.ioa.function =  args.op;
+   rc = opal_err_injct(&ej);
+   if (rc != OPAL_SUCCESS) {
+   pr_warn(&qu

[PATCH 13/22] powerpc/eeh: Emulate RTAS call ibm,read-slot-reset-state2

2014-05-04 Thread Gavin Shan
The RTAS call "ibm,read-slot-reset-state2" is being used to retrieve
the various states of the specified PE, e.g. reset state, frozen DMA,
frozen MMIO etc. The patch implements the backend to emulate the
RTAS call.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 77 +++
 1 file changed, 77 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c 
b/arch/powerpc/platforms/powernv/eeh-rtas.c
index 3e38d13..031ee8c 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -260,6 +260,80 @@ out:
return ret;
 }
 
+static int kvmppc_eeh_get_state2(struct kvm_vcpu *vcpu,
+struct rtas_args *args)
+{
+   struct pci_controller *hose;
+   struct pnv_phb *phb;
+   struct eeh_dev *edev;
+   struct eeh_pe *pe;
+   struct eeh_vfio_pci_addr addr;
+   int result, ret = 0;
+
+   /* Sanity check on parameter */
+   if (args->nargs != 3 || (args->nret != 4 && args->nret != 5)) {
+   pr_warn("%s: Non-matched argument (%d, %d) - (3, 4/5)\n",
+   __func__, args->nargs, args->nret);
+   ret = -3;
+   goto out;
+   }
+
+   /* Figure out the address */
+   if (kvmppc_eeh_format_addr(vcpu, args, &addr, false, &edev, &pe)) {
+   ret = -3;
+   goto out;
+   }
+
+   /* Make sure that the EEH stuff has been initialized */
+   hose = pe->phb;
+   phb = hose->private_data;
+   if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+   pr_warn("%s: EEH disabled on PHB#%d\n",
+   __func__, hose->global_number);
+   ret = -3;
+   args->rets[2] = 0;
+   goto out;
+   }
+
+   /*
+* Mark EEH supported on the PCI device. Otherwise,
+* the PE state is meaningless to the guest
+*/
+   args->rets[2] = 1;
+
+   /* Call to the IOC dependent function */
+   if (phb->eeh_ops && phb->eeh_ops->get_state) {
+   result = phb->eeh_ops->get_state(pe);
+
+   if (!(result & EEH_STATE_RESET_ACTIVE) &&
+   (result & EEH_STATE_DMA_ENABLED) &&
+   (result & EEH_STATE_MMIO_ENABLED))
+   args->rets[1] = 0;
+   else if (result & EEH_STATE_RESET_ACTIVE)
+   args->rets[1] = 1;
+   else if (!(result & EEH_STATE_RESET_ACTIVE) &&
+!(result & EEH_STATE_DMA_ENABLED) &&
+!(result & EEH_STATE_MMIO_ENABLED))
+   args->rets[1] = 2;
+   else if (!(result & EEH_STATE_RESET_ACTIVE) &&
+   (result & EEH_STATE_DMA_ENABLED) &&
+   !(result & EEH_STATE_MMIO_ENABLED))
+   args->rets[1] = 4;
+   else {
+   args->rets[1] = 5;
+   args->rets[3] = 1000;
+   }
+
+   ret = 0;
+   } else {
+   pr_warn("%s: Unsupported request\n",
+   __func__);
+   ret = -3;
+   }
+out:
+   return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -282,6 +356,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct 
rtas_args *args, int op)
case eeh_rtas_set_slot_reset:
ret = kvmppc_eeh_set_reset(vcpu, args);
break;
+   case eeh_rtas_read_slot_reset_state2:
+   ret = kvmppc_eeh_get_state2(vcpu, args);
+   break;
default:
pr_warn("%s: Unsupported EEH RTAS service#%d\n",
__func__, op);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/22] kvm: Address mapping for VFIO device

2014-05-04 Thread Gavin Shan
The address (domain/bus/slot/function) looks different from the
perspective of host and guest. We have to setup the mapping for
EEH and tear it down accordingly. The patch introduces additional
attributes to KVM VFIO device for address mapping or unmapping.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kvm/Kconfig  |  1 +
 arch/powerpc/kvm/Makefile |  3 +++
 include/uapi/linux/kvm.h  | 10 
 virt/kvm/vfio.c   | 60 ++-
 4 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 743d2d9..6764fc5 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -64,6 +64,7 @@ config KVM_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
+   select KVM_VFIO if VFIO
---help---
  Support running unmodified book3s_64 and book3s_32 guest kernels
  in virtual machines on book3s_64 host processors.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ce569b6..673038d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -97,6 +97,9 @@ endif
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
 
+kvm-book3s_64-objs-$(CONFIG_KVM_VFIO) += \
+   $(addprefix ../../../virt/kvm/, vfio.o)
+
 kvm-book3s_64-module-objs += \
$(KVM)/kvm_main.o \
$(KVM)/eventfd.o \
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a8f4ee5..97b4d1e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -932,9 +932,19 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_GROUP1
 #define   KVM_DEV_VFIO_GROUP_ADD   1
 #define   KVM_DEV_VFIO_GROUP_DEL   2
+#define  KVM_DEV_VFIO_DEV  2
+#define   KVM_DEV_VFIO_DEV_EEH_MAP 1
+#define   KVM_DEV_VFIO_DEV_EEH_UNMAP   2
 #define KVM_DEV_TYPE_ARM_VGIC_V2   5
 #define KVM_DEV_TYPE_FLIC  6
 
+struct kvm_vfio_pci_addr {
+   __u32 domain;   /* Host PHB domain  */
+   __u32 bdn;  /* Host bus/dev/func*/
+   __u64 gbuid;/* Guet PHB BUID*/
+   __u32 gbdn; /* Guest bus/dev/func   */
+};
+
 /*
  * ioctls for VM fds
  */
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ba1a93f..778015d 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -28,6 +28,10 @@ struct kvm_vfio {
struct list_head group_list;
struct mutex lock;
bool noncoherent;
+#ifdef CONFIG_KVM_EEH
+   kvm_vfio_dev_eeh_map eeh_map;
+   kvm_vfio_dev_eeh_unmap eeh_unmap;
+#endif
 };
 
 static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
@@ -201,12 +205,53 @@ static int kvm_vfio_set_group(struct kvm_device *dev, 
long attr, u64 arg)
return -ENXIO;
 }
 
+static int kvm_vfio_set_dev(struct kvm_device *dev, long attr, u64 arg)
+{
+   struct kvm_vfio *kv = dev->private;
+   struct kvm_vfio_pci_addr addr;
+   int ret = -ENXIO;
+
+   switch (attr) {
+#ifdef CONFIG_KVM_EEH
+   case KVM_DEV_VFIO_DEV_EEH_MAP:
+   if (copy_from_user(&addr, (void __user *)arg, sizeof(addr))) {
+   ret = -EFAULT;
+   break;
+   }
+
+   if (kv->eeh_map)
+   ret = kv->eeh_map(dev->kvm, addr.domain,
+ addr.bdn, addr.gbuid, addr.gbdn);
+   else
+   ret = 0;
+
+   break;
+   case KVM_DEV_VFIO_DEV_EEH_UNMAP:
+   if (copy_from_user(&addr, (void __user *)arg, sizeof(addr))) {
+   ret = -EFAULT;
+   break;
+   }
+
+   if (kv->eeh_unmap)
+   ret = kv->eeh_unmap(dev->kvm, addr.domain, addr.bdn);
+   else
+   ret = 0;
+
+   break;
+#endif
+   }
+
+   return ret;
+}
+
 static int kvm_vfio_set_attr(struct kvm_device *dev,
 struct kvm_device_attr *attr)
 {
switch (attr->group) {
case KVM_DEV_VFIO_GROUP:
return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+   case KVM_DEV_VFIO_DEV:
+   return kvm_vfio_set_dev(dev, attr->attr, attr->addr);
}
 
return -ENXIO;
@@ -224,6 +269,16 @@ static int kvm_vfio_has_attr(struct kvm_device *dev,
}
 
break;
+   case KVM_DEV_VFIO_DEV:
+   switch (attr->attr) {
+#ifdef CONFIG_KVM_EEH
+   case KVM_DEV_VFIO_DEV_EEH_MAP:
+   case KVM_DEV_VFIO_DEV_EEH_UNMAP:
+   return 0;
+#endif
+   }
+
+   break;
}
 
return -ENXIO;
@@ -262,7 +317,10 @@ static int kvm_vfio_create(struct kvm_devic

[PATCH 17/22] powerpc/kvm: Connect EEH RTAS emulation backend

2014-05-04 Thread Gavin Shan
The patch intends to connect the KVM module with the backend for
EEH RTAS emulation. In turn, we can handle the EEH RTAS services
from the guest.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/kvm_ppc.h |  7 +++
 arch/powerpc/kvm/book3s_rtas.c | 40 ++
 2 files changed, 47 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 4096f16..18b51a1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,6 +29,9 @@
 #include 
 #include 
 #include 
+#ifdef CONFIG_KVM_EEH
+#include 
+#endif
 #ifdef CONFIG_PPC_BOOK3S
 #include 
 #else
@@ -166,6 +169,10 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct 
kvm_interrupt *irq);
 extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
 extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
 extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+#ifdef CONFIG_KVM_EEH
+extern void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu,
+   struct rtas_args *args, int flag);
+#endif
 extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority);
 extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 7a05315..17bdb4a 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -16,6 +16,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #ifdef CONFIG_KVM_XICS
 static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -103,6 +105,24 @@ out:
 }
 #endif /* CONFIG_KVM_XICS */
 
+#ifdef CONFIG_KVM_EEH
+
+#define KVM_RTAS_EEH_FUNC(name, flag)  \
+static void kvm_rtas_eeh_##name(struct kvm_vcpu *vcpu, \
+   struct rtas_args *args) \
+{  \
+   kvmppc_eeh_rtas(vcpu, args, flag);  \
+}
+
+KVM_RTAS_EEH_FUNC(set_option,  eeh_rtas_set_option)
+KVM_RTAS_EEH_FUNC(set_reset,   eeh_rtas_set_slot_reset)
+KVM_RTAS_EEH_FUNC(read_state2, eeh_rtas_read_slot_reset_state2)
+KVM_RTAS_EEH_FUNC(addr_info2,  eeh_rtas_get_config_addr_info2)
+KVM_RTAS_EEH_FUNC(error_detail,eeh_rtas_slot_error_detail)
+KVM_RTAS_EEH_FUNC(configure_pe,eeh_rtas_configure_pe)
+
+#endif /* CONFIG_KVM_EEH */
+
 struct rtas_handler {
void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
char *name;
@@ -115,6 +135,26 @@ static struct rtas_handler rtas_handlers[] = {
{ .name = "ibm,int-off",  .handler = kvm_rtas_int_off },
{ .name = "ibm,int-on",   .handler = kvm_rtas_int_on },
 #endif
+#ifdef CONFIG_KVM_EEH
+   { .name = "ibm,set-eeh-option",
+ .handler = kvm_rtas_eeh_set_option
+   },
+   { .name = "ibm,set-slot-reset",
+ .handler = kvm_rtas_eeh_set_reset
+   },
+   { .name = "ibm,read-slot-reset-state2",
+ .handler = kvm_rtas_eeh_read_state2
+   },
+   { .name = "ibm,get-config-addr-info2",
+ .handler = kvm_rtas_eeh_addr_info2
+   },
+   { .name = "ibm,slot-error-detail",
+ .handler = kvm_rtas_eeh_error_detail
+   },
+   { .name = "ibm,configure-pe",
+ .handler = kvm_rtas_eeh_configure_pe
+   }
+#endif /* CONFIG_KVM_EEH */
 };
 
 struct rtas_token_definition {
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 21/22] powerpc/powernv: Sync OPAL header file with firmware

2014-05-04 Thread Gavin Shan
The patch synchronizes OPAL header file with firmware so that the
host kernel can make OPAL call to do error injection.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/opal.h| 65 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |  1 +
 2 files changed, 66 insertions(+)

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 66ad7a7..ca55d9c 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -175,6 +175,7 @@ extern int opal_enter_rtas(struct rtas_args *args,
 #define OPAL_SET_PARAM 90
 #define OPAL_DUMP_RESEND   91
 #define OPAL_DUMP_INFO294
+#define OPAL_ERR_INJECT96
 
 #ifndef __ASSEMBLY__
 
@@ -219,6 +220,69 @@ enum OpalPciErrorSeverity {
OPAL_EEH_SEV_INF= 5
 };
 
+enum OpalErrinjctType {
+   OpalErrinjctTypeFirst   = 0,
+   OpalErrinjctTypeFatal   = 1,
+   OpalErrinjctTypeRecoverRandomEvent  = 2,
+   OpalErrinjctTypeRecoverSpecialEvent = 3,
+   OpalErrinjctTypeCorruptedPage   = 4,
+   OpalErrinjctTypeCorruptedSlb= 5,
+   OpalErrinjctTypeTranslatorFailure   = 6,
+   OpalErrinjctTypeIoaBusError = 7,
+   OpalErrinjctTypeIoaBusError64   = 8,
+   OpalErrinjctTypePlatformSpecific= 9,
+   OpalErrinjctTypeDcacheStart = 10,
+   OpalErrinjctTypeDcacheEnd   = 11,
+   OpalErrinjctTypeIcacheStart = 12,
+   OpalErrinjctTypeIcacheEnd   = 13,
+   OpalErrinjctTypeTlbStart= 14,
+   OpalErrinjctTypeTlbEnd  = 15,
+   OpalErrinjctTypeUpstreamIoError = 16,
+   OpalErrinjctTypeLast= 17,
+
+   /* IoaBusError & IoaBusError64 */
+   OpalEjtIoaLoadMemAddr   = 0,
+   OpalEjtIoaLoadMemData   = 1,
+   OpalEjtIoaLoadIoAddr= 2,
+   OpalEjtIoaLoadIoData= 3,
+   OpalEjtIoaLoadConfigAddr= 4,
+   OpalEjtIoaLoadConfigData= 5,
+   OpalEjtIoaStoreMemAddr  = 6,
+   OpalEjtIoaStoreMemData  = 7,
+   OpalEjtIoaStoreIoAddr   = 8,
+   OpalEjtIoaStoreIoData   = 9,
+   OpalEjtIoaStoreConfigAddr   = 10,
+   OpalEjtIoaStoreConfigData   = 11,
+   OpalEjtIoaDmaReadMemAddr= 12,
+   OpalEjtIoaDmaReadMemData= 13,
+   OpalEjtIoaDmaReadMemMaster  = 14,
+   OpalEjtIoaDmaReadMemTarget  = 15,
+   OpalEjtIoaDmaWriteMemAddr   = 16,
+   OpalEjtIoaDmaWriteMemData   = 17,
+   OpalEjtIoaDmaWriteMemMaster = 18,
+   OpalEjtIoaDmaWriteMemTarget = 19,
+};
+
+struct OpalErrinjct {
+   int32_t type;
+   union {
+   struct {
+   uint32_t addr;
+   uint32_t mask;
+   uint64_t phb_id;
+   uint32_t pe;
+   uint32_t function;
+   }ioa;
+   struct {
+   uint64_t addr;
+   uint64_t mask;
+   uint64_t phb_id;
+   uint32_t pe;
+   uint32_t function;
+   }ioa64;
+   };
+};
+
 enum OpalShpcAction {
OPAL_SHPC_GET_LINK_STATE = 0,
OPAL_SHPC_GET_SLOT_STATE = 1
@@ -839,6 +903,7 @@ int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, void 
*diag_buffer,
   uint64_t diag_buffer_len);
 int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id, void *diag_buffer,
uint64_t diag_buffer_len);
+int64_t opal_err_injct(void *data);
 int64_t opal_pci_fence_phb(uint64_t phb_id);
 int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t data);
 int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t 
error_type, uint8_t mask_action);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index f531ffe..46265de 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -119,6 +119,7 @@ OPAL_CALL(opal_pci_next_error,  
OPAL_PCI_NEXT_ERROR);
 OPAL_CALL(opal_pci_poll,   OPAL_PCI_POLL);
 OPAL_CALL(opal_pci_msi_eoi,OPAL_PCI_MSI_EOI);
 OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2);
+OPAL_CALL(opal_err_injct,  OPAL_ERR_INJECT);
 OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ);
 OPAL_CALL(opal_xscom_w

[PATCH 03/22] powerpc/eeh: Search EEH device by guest address

2014-05-04 Thread Gavin Shan
The patch introduces function eeh_vfio_dev_get() to search the EEH
device according to its guest address, which is made up of VM indicator,
PHB BUID, bus, slot and function number. The function is useful in the
backends for EEH RTAS emulation.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h |  6 ++
 arch/powerpc/kernel/eeh_pe.c   | 45 ++
 2 files changed, 51 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 8bfb167..b12e3e9 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -384,6 +384,12 @@ static inline void eeh_remove_device(struct pci_dev *dev) 
{ }
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
 #endif /* CONFIG_EEH */
 
+
+#ifdef CONFIG_KVM_EEH
+struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr *addr);
+
+#endif /* CONFIG_KVM_EEH */
+
 #ifdef CONFIG_PPC64
 /*
  * MMIO read/write operations with EEH support.
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index fbd01eb..dba7c82 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -248,6 +248,51 @@ struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
return pe;
 }
 
+#ifdef CONFIG_KVM_EEH
+static void *__eeh_vfio_dev_get(void *data, void *flag)
+{
+   struct eeh_pe *pe = (struct eeh_pe *)data;
+   struct eeh_vfio_pci_addr *addr = (struct eeh_vfio_pci_addr *)flag;
+   struct eeh_dev *edev, *tmp;
+
+   eeh_pe_for_each_dev(pe, edev, tmp) {
+   if (!eeh_dev_passed(edev))
+   continue;
+
+   /* Comparing the address in the guest */
+   if (addr->kvm == edev->gaddr.kvm &&
+   addr->buid_hi == edev->gaddr.buid_hi &&
+   addr->buid_lo == edev->gaddr.buid_lo &&
+   addr->bus == edev->gaddr.bus &&
+   addr->devfn   == edev->gaddr.devfn)
+   return edev;
+   }
+
+   return NULL;
+}
+
+/**
+ * eeh_vfio_dev_get - Search EEH device based on guest's address
+ * @addr: EEH device guest address
+ *
+ * Search the EEH device according to its guest's address, which
+ * is made up of PHB BUID, and PCI config address.
+ */
+struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr *addr)
+{
+   struct eeh_pe *root;
+   struct eeh_dev *edev;
+
+   list_for_each_entry(root, &eeh_phb_pe, child) {
+   edev = eeh_pe_traverse(root, __eeh_vfio_dev_get, addr);
+   if (edev)
+   return edev;
+   }
+
+   return NULL;
+}
+#endif /* CONFIG_KVM_EEH */
+
 /**
  * eeh_pe_get_parent - Retrieve the parent PE
  * @edev: EEH device
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 19/22] powerpc: Introduce CONFIG_KVM_ERRINJCT

2014-05-04 Thread Gavin Shan
The patch introduces kernel configuration option KVM_ERRINJCT. It
enables emulating error injection RTAS services used on IBM POWER
(pSeries) servers.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kvm/Kconfig | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 6764fc5..914ab05 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -198,6 +198,14 @@ config KVM_EEH
  Enable support for emulating EEH RTAS services used on IBM
  POWER (pSeries) servers.
 
+config KVM_ERRINJCT
+   bool "KVM in-kernel error injection emulation"
+   depends on KVM_EEH
+   default y
+   ---help---
+ Enable support for emulating error injection services used
+ on IBM POWER (pSeries) servers
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 15/22] powerpc/eeh: Emulate RTAS call ibm,slot-error-detail

2014-05-04 Thread Gavin Shan
The RTAS call "ibm,slot-error-detail" is being used to retrieve the
error log (either permanent or temporary) from the underlying firmware.
The patch implements the backend to emulate the RTAS call.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 75 +++
 1 file changed, 75 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c 
b/arch/powerpc/platforms/powernv/eeh-rtas.c
index 4a9c2c7..8934564 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -390,6 +390,78 @@ out:
return ret;
 }
 
+static int kvmppc_eeh_get_error(struct kvm_vcpu *vcpu,
+   struct rtas_args *args)
+{
+   struct pci_controller *hose;
+   struct pnv_phb *phb;
+   struct eeh_dev *edev;
+   struct eeh_pe *pe;
+   struct eeh_vfio_pci_addr addr;
+   char *log;
+   int guest_log;
+   int len, severity;
+   int ret = 0;
+
+   /* Sanity check on parameter */
+   if (args->nargs != 8 || args->nret != 1) {
+   pr_warn("%s: Non-matched arguments (%d, %d) - (8, 1)\n",
+   __func__, args->nargs, args->nret);
+   ret = 1;
+   goto out;
+   } else if (args->args[7] != 1 && args->args[7] != 2) {
+   pr_warn("%s: Invalid Log type\n", __func__);
+   ret = 1;
+   goto out;
+   }
+
+   /* Figure out the address */
+   if (kvmppc_eeh_format_addr(vcpu, args, &addr, false, &edev, &pe)) {
+   ret = 1;
+   goto out;
+   }
+
+   /* Make sure that the EEH stuff has been initialized */
+   hose = pe->phb;
+   phb = hose->private_data;
+   if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+   pr_warn("%s: EEH disabled on PHB#%d\n",
+   __func__, hose->global_number);
+   ret = 1;
+   goto out;
+   }
+
+   /*
+* Retrieve error log from PE. We don't have cached error
+* log for one specific PE yet, which need to be figured
+* out later.
+*/
+   if (phb->eeh_ops && phb->eeh_ops->get_log) {
+   guest_log = args->args[5];
+   len = args->args[6];
+   severity = args->args[7];
+   log = kzalloc(len, GFP_KERNEL);
+   if (!log) {
+   pr_err("%s: Out of memory!\n", __func__);
+   ret = 1;
+   goto out;
+   }
+
+   phb->eeh_ops->get_log(pe, severity, log, len);
+   if (kvm_write_guest(vcpu->kvm, guest_log, log, len)) {
+   pr_warn("%s: Fail pushing log to guest\n",
+   __func__);
+   ret = 1;
+   }
+
+   kfree(log);
+   } else {
+   ret = 1;
+   }
+out:
+   return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -418,6 +490,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct 
rtas_args *args, int op)
case eeh_rtas_get_config_addr_info2:
ret = kvmppc_eeh_get_addr2(vcpu, args);
break;
+   case eeh_rtas_slot_error_detail:
+   ret = kvmppc_eeh_get_error(vcpu, args);
+   break;
default:
pr_warn("%s: Unsupported EEH RTAS service#%d\n",
__func__, op);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/22] powerpc/eeh: Info to trace passed devices

2014-05-04 Thread Gavin Shan
The address of passed PCI devices (domain:bus:slot:func) might be
quite different from the perspective of host and guest. We have to
trace the address mapping so that we can emulate EEH RTAS requests
from guest. The patch introduces additional fields to eeh_pe and
eeh_dev for the purpose.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h | 49 ++
 1 file changed, 49 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 7782056..8bfb167 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -48,6 +48,17 @@ struct device_node;
 #define EEH_PE_RST_HOLD_TIME   250
 #define EEH_PE_RST_SETTLE_TIME 1800
 
+#ifdef CONFIG_KVM_EEH
+struct eeh_vfio_pci_addr {
+   struct kvm  *kvm;   /* KVM identifier   */
+   unsigned intbuid_hi;/* PHB BUID high*/
+   unsigned intbuid_lo;/* PHB BUID low */
+   unsigned char   bus;/* Bus number   */
+   unsigned char   devfn;  /* Slot and function*/
+   int pe_addr;/* PE configuration address */
+};
+#endif /* CONFIG_KVM_EEH */
+
 /*
  * The struct is used to trace PE related EEH functionality.
  * In theory, there will have one instance of the struct to
@@ -72,6 +83,7 @@ struct device_node;
 #define EEH_PE_RESET   (1 << 2)/* PE reset in progress */
 
 #define EEH_PE_KEEP(1 << 8)/* Keep PE on hotplug   */
+#define EEH_PE_PASSTHROUGH (1 << 9)/* PE owned by guest*/
 
 struct eeh_pe {
int type;   /* PE type: PHB/Bus/Device  */
@@ -85,6 +97,9 @@ struct eeh_pe {
struct timeval tstamp;  /* Time on first-time freeze*/
int false_positives;/* Times of reported #ff's  */
struct eeh_pe *parent;  /* Parent PE*/
+#ifdef CONFIG_KVM_EEH
+   struct eeh_vfio_pci_addr gaddr; /* Associated KVM guest address */
+#endif
struct list_head child_list;/* Link PE to the child list*/
struct list_head edevs; /* Link list of EEH devices */
struct list_head child; /* Child PEs*/
@@ -93,6 +108,21 @@ struct eeh_pe {
 #define eeh_pe_for_each_dev(pe, edev, tmp) \
list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
 
+static inline bool eeh_pe_passed(struct eeh_pe *pe)
+{
+   return pe ? !!(pe->state & EEH_PE_PASSTHROUGH) : false;
+}
+
+static inline void eeh_pe_set_passed(struct eeh_pe *pe, bool passed)
+{
+   if (pe) {
+   if (passed)
+   pe->state |= EEH_PE_PASSTHROUGH;
+   else
+   pe->state &= ~EEH_PE_PASSTHROUGH;
+   }
+}
+
 /*
  * The struct is used to trace EEH state for the associated
  * PCI device node or PCI device. In future, it might
@@ -110,6 +140,7 @@ struct eeh_pe {
 #define EEH_DEV_SYSFS  (1 << 9)/* Sysfs created*/
 #define EEH_DEV_REMOVED(1 << 10)   /* Removed permanently  
*/
 #define EEH_DEV_FRESET (1 << 11)   /* Fundamental reset*/
+#define EEH_DEV_PASSTHROUGH(1 << 12)   /* Owned by guest   */
 
 struct eeh_dev {
int mode;   /* EEH mode */
@@ -126,6 +157,9 @@ struct eeh_dev {
struct device_node *dn; /* Associated device node   */
struct pci_dev *pdev;   /* Associated PCI device*/
struct pci_bus *bus;/* PCI bus for partial hotplug  */
+#ifdef CONFIG_KVM_EEH
+   struct eeh_vfio_pci_addr gaddr; /* Address in guest */
+#endif
 };
 
 static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
@@ -138,6 +172,21 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct 
eeh_dev *edev)
return edev ? edev->pdev : NULL;
 }
 
+static inline bool eeh_dev_passed(struct eeh_dev *dev)
+{
+   return dev ? !!(dev->mode & EEH_DEV_PASSTHROUGH) : false;
+}
+
+static inline void eeh_dev_set_passed(struct eeh_dev *dev, bool passed)
+{
+   if (dev) {
+   if (passed)
+   dev->mode |= EEH_DEV_PASSTHROUGH;
+   else
+   dev->mode &= ~EEH_DEV_PASSTHROUGH;
+   }
+}
+
 /* Return values from eeh_ops::next_error */
 enum {
EEH_NEXT_ERR_NONE = 0,
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 12/22] powerpc/eeh: Emulate RTAS call ibm,set-slot-reset

2014-05-04 Thread Gavin Shan
The RTAS call "ibm,set-slot-reset" is being used to reset one
particular PE, either foundamental or hot reset. The patche intends
to implement the backend to emulate the RTAS call.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 92 +++
 1 file changed, 92 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c 
b/arch/powerpc/platforms/powernv/eeh-rtas.c
index 1a037fd..3e38d13 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -171,6 +171,95 @@ out:
return ret;
 }
 
+static int kvmppc_eeh_set_reset(struct kvm_vcpu *vcpu,
+   struct rtas_args *args)
+{
+   struct pci_controller *hose;
+   struct pnv_phb *phb;
+   struct eeh_dev *edev;
+   struct eeh_pe *pe;
+   struct eeh_vfio_pci_addr addr;
+   int opcode;
+   int ret = 0;
+
+   /* Sanity check on parameter */
+   if (args->nargs != 4 || args->nret != 1) {
+   pr_warn("%s: Non-matched arguments (%d, %d) - (4, 1)\n",
+   __func__, args->nargs, args->nret);
+   ret = -3;
+   goto out;
+   }
+
+   /* Sanity check on opcode */
+   opcode = args->args[3];
+   if (opcode != EEH_RESET_DEACTIVATE &&
+   opcode != EEH_RESET_HOT &&
+   opcode != EEH_RESET_FUNDAMENTAL) {
+   pr_warn("%s: Unsupported opcode %d\n",
+   __func__, opcode);
+   ret = -3;
+   goto out;
+   }
+
+   /* Figure out the address. We always have PE address */
+   if (kvmppc_eeh_format_addr(vcpu, args, &addr, false, &edev, &pe)) {
+   ret = -3;
+   goto out;
+   }
+
+   /* Insure that the EEH stuff has been initialized */
+   hose = pe->phb;
+   phb = hose->private_data;
+   if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+   pr_warn("%s: EEH disable on PHB#%d\n",
+   __func__, hose->global_number);
+   ret = -7;
+   goto out;
+   }
+
+   /* Call into the IODA dependent backend to do the reset */
+   if (!phb->eeh_ops ||
+   !phb->eeh_ops->set_option ||
+   !phb->eeh_ops->reset) {
+   pr_warn("%s: Unsupported request\n", __func__);
+   ret = -7;
+   } else {
+   /*
+* The frozen PE might be caused by the mechanism called
+* PAPR error injection, which is supposed to be one-shot
+* without "sticky" bit as being stated by the spec. But
+* the reality isn't that, at least on P7IOC. So we have
+* to clear that to avoid recrusive error, which fail the
+* recovery.
+*/
+   if (opcode == EEH_RESET_DEACTIVATE)
+   opal_pci_reset(phb->opal_id,
+  OPAL_PHB_ERROR,
+  OPAL_ASSERT_RESET);
+
+   if (phb->eeh_ops->reset(pe, opcode)) {
+   pr_warn("%s: Failure from backend\n",
+   __func__);
+   ret = -1;
+   goto out;
+   }
+
+   /*
+* The PE is still in frozen state and we need clear that.
+* It's good to clear frozen state after deassert to avoid
+* messy IO access during reset, which might cause recrusive
+* frozen PE.
+*/
+   if (opcode == EEH_RESET_DEACTIVATE) {
+   phb->eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO);
+   phb->eeh_ops->set_option(pe, EEH_OPT_THAW_DMA);
+   }
+   }
+
+out:
+   return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -190,6 +279,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct 
rtas_args *args, int op)
case eeh_rtas_set_option:
ret = kvmppc_eeh_set_option(vcpu, args);
break;
+   case eeh_rtas_set_slot_reset:
+   ret = kvmppc_eeh_set_reset(vcpu, args);
+   break;
default:
pr_warn("%s: Unsupported EEH RTAS service#%d\n",
__func__, op);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/22] powerpc/powernv: EEH RTAS emulation backend

2014-05-04 Thread Gavin Shan
The implementation of EEH RTAS emulation is split up into 2 layers:
kvm and powernv platform layer. The KVM layer is quite simple to
dispatch RTAS requests from guest to powernv platform layer. After
that, the powernv platform layer takes care of the details, process
the request and return result to kvm layer.

The patch implements the infrastructure of powernv platform layer
for EEH RTAS emulation.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h| 18 +
 arch/powerpc/platforms/powernv/Makefile   |  1 +
 arch/powerpc/platforms/powernv/eeh-rtas.c | 64 +++
 3 files changed, 83 insertions(+)
 create mode 100644 arch/powerpc/platforms/powernv/eeh-rtas.c

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 677c719..7384dee 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -49,6 +49,24 @@ struct device_node;
 #define EEH_PE_RST_SETTLE_TIME 1800
 
 #ifdef CONFIG_KVM_EEH
+
+/*
+ * Those EEH RTAS operations are going to be emulated.
+ * According to PAPR specification, there're much more
+ * operations. However, the following RTAS operations
+ * are enough for EEH in guest to work properly.
+ */
+enum {
+   eeh_rtas_first  = 0,
+   eeh_rtas_set_option = 0,
+   eeh_rtas_set_slot_reset = 1,
+   eeh_rtas_read_slot_reset_state2 = 2,
+   eeh_rtas_get_config_addr_info2  = 3,
+   eeh_rtas_slot_error_detail  = 4,
+   eeh_rtas_configure_pe   = 5,
+   eeh_rtas_last   = 5
+};
+
 struct eeh_vfio_pci_addr {
struct kvm  *kvm;   /* KVM identifier   */
unsigned intbuid_hi;/* PHB BUID high*/
diff --git a/arch/powerpc/platforms/powernv/Makefile 
b/arch/powerpc/platforms/powernv/Makefile
index 63cebb9..d8ea670 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,5 +6,6 @@ obj-y   += opal-msglog.o
 obj-$(CONFIG_SMP)  += smp.o
 obj-$(CONFIG_PCI)  += pci.o pci-p5ioc2.o pci-ioda.o
 obj-$(CONFIG_EEH)  += eeh-ioda.o eeh-powernv.o
+obj-$(CONFIG_KVM_EEH)  += eeh-rtas.o
 obj-$(CONFIG_PPC_SCOM) += opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)   += opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c 
b/arch/powerpc/platforms/powernv/eeh-rtas.c
new file mode 100644
index 000..fded461
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -0,0 +1,64 @@
+/*
+ * The file intends to implement emulation for EEH related RTAS services,
+ * which is expected to be done inside hypervisor. The specific RTAS
+ * service is identified by its unique token. Currently, the tokens
+ * are assigned by QEMU in a dynamic way and the dedicated hcall (0xf000)
+ * was introduced for the purpose of RTAS emulation either in hypervisor
+ * or QEMU.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "powernv.h"
+#include "pci.h"
+
+/**
+ * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
+ * @vcpu: KVM virtual CPU
+ * @args: RTAS parameter
+ * @op: identifier of the specific EEH RTAS service
+ *
+ * The function will be called when the hypervisor receives emulation
+ * request on EEH RTAS from guest. Accordingly, it will dispatch to
+ * specific functions to handle the request.
+ */
+void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct rtas_args *args, int op)
+{
+   int ret = -3;
+
+   /* Parse the requested service */
+   switch (op) {
+   default:
+   pr_warn("%s: Unsupported EEH RTAS service#%d\n",
+   __func__, op);
+   }
+
+   args->rets[0] = ret;
+}
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 04/22] powerpc/eeh: Search EEH PE by guest address

2014-05-04 Thread Gavin Shan
The patch introduces function eeh_vfio_pe_get() to search the EEH
PE according to its guest address, which is made up of KVM indicator,
PHB ID and PE configuration address. The function will be useful in
backends for EEH RTAS emulation.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/include/asm/eeh.h |  1 +
 arch/powerpc/kernel/eeh_pe.c   | 40 
 2 files changed, 41 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index b12e3e9..3807167 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -387,6 +387,7 @@ static inline void eeh_remove_device(struct pci_dev *dev) { 
}
 
 #ifdef CONFIG_KVM_EEH
 struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr *addr);
+struct eeh_pe *eeh_vfio_pe_get(struct eeh_vfio_pci_addr *addr);
 
 #endif /* CONFIG_KVM_EEH */
 
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index dba7c82..1bd7b1f 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -249,6 +249,46 @@ struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
 }
 
 #ifdef CONFIG_KVM_EEH
+static void *__eeh_vfio_pe_get(void *data, void *flag)
+{
+   struct eeh_pe *pe = (struct eeh_pe *)data;
+   struct eeh_vfio_pci_addr *addr = (struct eeh_vfio_pci_addr *)flag;
+
+   if (!eeh_pe_passed(pe))
+   return NULL;
+
+   /* Comparing the address */
+   if (addr->kvm == pe->gaddr.kvm &&
+   addr->buid_hi == pe->gaddr.buid_hi &&
+   addr->buid_lo == pe->gaddr.buid_lo &&
+   addr->pe_addr == pe->gaddr.pe_addr)
+   return pe;
+
+   return NULL;
+}
+
+/**
+ * eeh_vfio_pe_get - Search EEH PE based on guest's address
+ * @addr: EEH PE guest address
+ *
+ * Search the EEH PE according to the guest address, which
+ * is made up of VM indicator, PHB BUID, and PE configuration
+ * address.
+ */
+struct eeh_pe *eeh_vfio_pe_get(struct eeh_vfio_pci_addr *addr)
+{
+   struct eeh_pe *root;
+   struct eeh_pe *pe;
+
+   list_for_each_entry(root, &eeh_phb_pe, child) {
+   pe = eeh_pe_traverse(root, __eeh_vfio_pe_get, addr);
+   if (pe)
+   return pe;
+   }
+
+   return NULL;
+}
+
 static void *__eeh_vfio_dev_get(void *data, void *flag)
 {
struct eeh_pe *pe = (struct eeh_pe *)data;
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/22] powerpc/eeh: Emulate RTAS call ibm,configure-pe

2014-05-04 Thread Gavin Shan
The RTAS call "ibm,configure-pe" is being used to restore everything
after PE reset. The patch implements the backend to emulate the
RTAS call. In that, we restores BARs for the affected PCI device in
host side because the guest might not have full access to the config
space.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 49 +++
 1 file changed, 49 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c 
b/arch/powerpc/platforms/powernv/eeh-rtas.c
index 8934564..a663cd8 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -462,6 +462,52 @@ out:
return ret;
 }
 
+static int kvmppc_eeh_configure_pe(struct kvm_vcpu *vcpu,
+  struct rtas_args *args)
+{
+   struct pci_controller *hose;
+   struct pnv_phb *phb;
+   struct eeh_dev *edev;
+   struct eeh_pe *pe;
+   struct eeh_vfio_pci_addr addr;
+   int ret = 0;
+
+   /* Sanity check on parameter */
+   if (args->nargs != 3 || args->nret != 1) {
+   pr_warn("%s: Non-matched arguments (%d, %d) - (3, 1)\n",
+   __func__, args->nargs, args->nret);
+   ret = -3;
+   goto out;
+   }
+
+   /* Figure out the address */
+   if (kvmppc_eeh_format_addr(vcpu, args, &addr, false, &edev, &pe)) {
+   ret = -3;
+   goto out;
+   }
+
+   /* Make sure that the EEH stuff has been initialized */
+   hose = pe->phb;
+   phb = hose->private_data;
+   if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+   pr_warn("%s: EEH disabled on PHB#%x\n",
+   __func__, hose->global_number);
+   ret = -3;
+   goto out;
+   }
+
+   /*
+* The access to PCI config space on VFIO device has some
+* limitations. Part of PCI config space, including BAR
+* registers are not readable and writable. So the guest
+* should have stale values for those registers and we have
+* to restore them in host side.
+*/
+   eeh_pe_restore_bars(pe);
+out:
+   return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -493,6 +539,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct 
rtas_args *args, int op)
case eeh_rtas_slot_error_detail:
ret = kvmppc_eeh_get_error(vcpu, args);
break;
+   case eeh_rtas_configure_pe:
+   ret = kvmppc_eeh_configure_pe(vcpu, args);
+   break;
default:
pr_warn("%s: Unsupported EEH RTAS service#%d\n",
__func__, op);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC 00/22] EEH Support for VFIO PCI devices on PowerKVM guest

2014-05-04 Thread Gavin Shan
The series of patches intends to support EEH for PCI devices, which have been
passed through to PowerKVM based guest via VFIO. The implementation is
straightforward based on the issues or problems we have to resolve to support
EEH for PowerKVM based guest.

- Emulation for EEH RTAS requests. Thanksfully, we already have infrastructure
  to emulate XICS. Without introducing new mechanism, we just extend that
  existing infrastructure to support EEH RTAS emulation. EEH RTAS requests
  initiated from guest are posted to host where the requests get handled or
  delivered to underly firmware for further handling. For that, the host kerenl
  has to maintain the PCI address (host domain/bus/slot/function to guest's
  PHB BUID/bus/slot/function) mapping via KVM VFIO device. The address mapping
  will be built when initializing VFIO device in QEMU and destroied when the
  VFIO device in QEMU is going to offline, or VM is destroy.

- The infrastructure for error injection is introduced. The emulation for the
  related RTAS services is similar to what we do for EEH/XICS RTAS requests.
  For now, we just support PCI error injection. We need extend it for injecting
  other types of errors in future.

The series of patches requires corresponding firmware changes from Mike Qiu to
support error injection and QEMU changes to support EEH for guest. It also needs
QEMU changes to support it. QEMU patchset will be sent separately.

I usually use command line (not virsh) to start PowerKVM based guests on 
Firebird-L
machine with different types of PCI devices assigend (passed through) to guest.
Following cases have been tested. The EEH error can be injected by utility 
"errinjct"
running on guest successfully and we can recover from the EEH error 
successfully.

Testing on P7
=

- Emulex adapter
- USB (OHCI) PCI adapter

Testing on P8
=

- MLX4 adapter (Partially)
- USB (xHCI) PCI adapter

-

arch/powerpc/include/asm/book3s_errinjct.h |  97 
arch/powerpc/include/asm/eeh.h |  78 
arch/powerpc/include/asm/kvm_ppc.h |   7 ++
arch/powerpc/include/asm/opal.h|  65 
arch/powerpc/kernel/eeh.c  |   8 ++
arch/powerpc/kernel/eeh_pe.c   | 297 
+
arch/powerpc/kvm/Kconfig   |  17 +
arch/powerpc/kvm/Makefile  |   6 ++
arch/powerpc/kvm/book3s_errinjct.c | 329 
+
arch/powerpc/kvm/book3s_hv.c   |   2 +
arch/powerpc/kvm/book3s_rtas.c |  67 +
arch/powerpc/platforms/powernv/Makefile|   2 +
arch/powerpc/platforms/powernv/eeh-ioda.c  |   3 +-
arch/powerpc/platforms/powernv/eeh-rtas.c  | 551 
+
arch/powerpc/platforms/powernv/errinjct.c  | 215 
+
arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
include/linux/kvm_host.h   |  21 ++
include/uapi/linux/kvm.h   |  10 +++
virt/kvm/vfio.c|  60 ++-
19 files changed, 1834 insertions(+), 2 deletions(-)
create mode 100644 arch/powerpc/include/asm/book3s_errinjct.h
create mode 100644 arch/powerpc/kvm/book3s_errinjct.c
create mode 100644 arch/powerpc/platforms/powernv/eeh-rtas.c
create mode 100644 arch/powerpc/platforms/powernv/errinjct.c

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/22] powerpc/eeh: Emulate RTAS call ibm,set-eeh-option

2014-05-04 Thread Gavin Shan
The RTAS call "ibm,set-eeh-option" is being used to enable/disable
EEH functionality on the specified PE, or enable MMIO/DMA for the
frozen PE. The patch emulates the RTAS call.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 83 +++
 1 file changed, 83 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c 
b/arch/powerpc/platforms/powernv/eeh-rtas.c
index f04b820..1a037fd 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -91,6 +91,86 @@ static int kvmppc_eeh_format_addr(struct kvm_vcpu *vcpu,
return 0;
 }
 
+static int kvmppc_eeh_set_option(struct kvm_vcpu *vcpu,
+struct rtas_args *args)
+{
+   struct pci_controller *hose;
+   struct pnv_phb *phb;
+   struct eeh_dev *edev;
+   struct eeh_pe *pe;
+   struct eeh_vfio_pci_addr addr;
+   int opcode;
+   bool is_legacy = false;
+   int ret = 0;
+
+   /* Sanity check on parameter */
+   if (args->nargs != 4 || args->nret != 1) {
+   pr_warn("%s: Non-matched arguments (%d, %d) - (4, 1)\n",
+   __func__, args->nargs, args->nret);
+   ret = -3;
+   goto out;
+   }
+
+   /* Check on opcode */
+   opcode = args->args[3];
+   if (opcode < EEH_OPT_DISABLE || opcode > EEH_OPT_THAW_DMA) {
+   pr_warn("%s: opcode %d out of range (%d, %d)\n",
+   __func__, opcode, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA);
+   ret = -3;
+   goto out;
+   }
+
+   if (opcode == EEH_OPT_ENABLE)
+   is_legacy = true;
+
+   /* Figure out the address */
+   if (kvmppc_eeh_format_addr(vcpu, args, &addr, is_legacy, &edev, &pe)) {
+   ret = -7;
+   goto out;
+   }
+
+   /* Insure that the EEH stuff has been initialized */
+   hose = pe->phb;
+   phb = hose->private_data;
+   if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+   pr_warn("%s: EEH disabled on PHB#%d\n",
+   __func__, hose->global_number);
+   ret = -7;
+   goto out;
+   }
+
+   /*
+* The EEH functionality has been enabled on all PEs
+* by default. So just return success. The same situation
+* would be applied while we disable EEH functionality.
+* However, the guest isn't expected to disable that
+* at all.
+*/
+   if (opcode == EEH_OPT_DISABLE ||
+   opcode == EEH_OPT_ENABLE) {
+   ret = 0;
+   goto out;
+   }
+
+   /*
+* Call into the IODA dependent backend in order
+* to enable DMA or MMIO for the indicated PE.
+*/
+   if (phb->eeh_ops && phb->eeh_ops->set_option) {
+   if (phb->eeh_ops->set_option(pe, opcode)) {
+   pr_warn("%s: Failure from backend\n",
+   __func__);
+   ret = -1;
+   }
+   } else {
+   pr_warn("%s: Unsupported request\n",
+   __func__);
+   ret = -7;
+   }
+out:
+   return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -107,6 +187,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct 
rtas_args *args, int op)
 
/* Parse the requested service */
switch (op) {
+   case eeh_rtas_set_option:
+   ret = kvmppc_eeh_set_option(vcpu, args);
+   break;
default:
pr_warn("%s: Unsupported EEH RTAS service#%d\n",
__func__, op);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/22] powerpc/eeh: Emulate RTAS call ibm,get-config-addr-info2

2014-05-04 Thread Gavin Shan
The RTAS call "ibm,get-config-addr-info2" is being used by guest
to retrieve the corresponding PE number for the specified PCI device.
The patch implements the backend to support the emulation of the
RTAS call.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 59 +++
 1 file changed, 59 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c 
b/arch/powerpc/platforms/powernv/eeh-rtas.c
index 031ee8c..4a9c2c7 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -334,6 +334,62 @@ out:
return ret;
 }
 
+static int kvmppc_eeh_get_addr2(struct kvm_vcpu *vcpu,
+   struct rtas_args *args)
+{
+   struct pci_controller *hose;
+   struct pnv_phb *phb;
+   struct eeh_dev *edev;
+   struct eeh_pe *pe;
+   struct eeh_vfio_pci_addr addr;
+   int opcode;
+   int ret = 0;
+
+   /* Sanity check on parameter */
+   if (args->nargs != 4 || args->nret != 2) {
+   pr_warn("%s: Non-matched arguments (%d, %d) - (4, 2)\n",
+   __func__, args->nargs, args->nret);
+   ret = -3;
+   goto out;
+   }
+
+   /* Check on the operation code */
+   opcode = args->args[3];
+   if (opcode != 0 && opcode != 1) {
+   pr_warn("%s: opcode %d out of range (0, 1)\n",
+   __func__, opcode);
+   ret = -3;
+   goto out;
+   }
+
+   /* Figure out address */
+   if (kvmppc_eeh_format_addr(vcpu, args, &addr, true, &edev, &pe)) {
+   ret = -3;
+   goto out;
+   }
+
+   /* Insure that the EEH stuff has been initialized */
+   hose = pe->phb;
+   phb = hose->private_data;
+   if (!(phb->flags & PNV_PHB_FLAG_EEH)) {
+   pr_warn("%s: EEH disabled on PHB#%d\n",
+   __func__, hose->global_number);
+   ret = -3;
+   goto out;
+   }
+
+   /*
+* Fill result according to opcode. We don't differentiate
+* PCI bus and device sensitive PE here.
+*/
+   if (opcode == 0)
+   args->rets[1] = pe->gaddr.pe_addr;
+   else
+   args->rets[1] = 1;
+out:
+   return ret;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
@@ -359,6 +415,9 @@ void kvmppc_eeh_rtas(struct kvm_vcpu *vcpu, struct 
rtas_args *args, int op)
case eeh_rtas_read_slot_reset_state2:
ret = kvmppc_eeh_get_state2(vcpu, args);
break;
+   case eeh_rtas_get_config_addr_info2:
+   ret = kvmppc_eeh_get_addr2(vcpu, args);
+   break;
default:
pr_warn("%s: Unsupported EEH RTAS service#%d\n",
__func__, op);
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/22] powerpc/eeh: Avoid event on passed PE

2014-05-04 Thread Gavin Shan
If we detects frozen state on PE that has been passed to guest, we
needn't handle it. Instead, we rely on the guest to detect and recover
it. The patch avoid EEH event on the frozen passed PE so that the guest
can have chance to handle that.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kernel/eeh.c | 8 
 arch/powerpc/platforms/powernv/eeh-ioda.c | 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 33d683a..a2121e8 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -399,6 +399,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
if (ret > 0)
return ret;
 
+   /*
+* If the PE has been passed to guest, we won't check the
+* state. Instead, let the guest handle it if the PE has
+* been frozen.
+*/
+   if (eeh_pe_passed(pe))
+   return 0;
+
/* If we already have a pending isolation event for this
 * slot, we know it's bad already, we don't need to check.
 * Do this checking under a lock; as multiple PCI devices
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c 
b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 1b5982f..03a3ed2 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -890,7 +890,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
opal_pci_eeh_freeze_clear(phb->opal_id, 
frozen_pe_no,
OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
ret = EEH_NEXT_ERR_NONE;
-   } else if ((*pe)->state & EEH_PE_ISOLATED) {
+   } else if ((*pe)->state & EEH_PE_ISOLATED ||
+  eeh_pe_passed(*pe)) {
ret = EEH_NEXT_ERR_NONE;
} else {
pr_err("EEH: Frozen PHB#%x-PE#%x (%s) 
detected\n",
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/22] powerpc/eeh: Introduce kvmppc_eeh_format_addr()

2014-05-04 Thread Gavin Shan
The guest will pass 2 kinds of addresses: tranditional bus/device/
function combo, and guest sensitive PE address returned from host.
The patch introduces function kvmppc_eeh_format_addr() to convert
the guest address information from RTAS call argument (struct rtas_args)
and retrieve the EEH device or PE instance if necessary. The function
will be used by subsequent patches.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/platforms/powernv/eeh-rtas.c | 52 +++
 1 file changed, 52 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/eeh-rtas.c 
b/arch/powerpc/platforms/powernv/eeh-rtas.c
index fded461..f04b820 100644
--- a/arch/powerpc/platforms/powernv/eeh-rtas.c
+++ b/arch/powerpc/platforms/powernv/eeh-rtas.c
@@ -39,6 +39,58 @@
 #include "powernv.h"
 #include "pci.h"
 
+/*
+ * Guest is passing 2 types of addresses. First one would be
+ * traditional bus/device/function combo and another one is
+ * PE address, which starts from 0x1
+ */
+static int kvmppc_eeh_format_addr(struct kvm_vcpu *vcpu,
+ struct rtas_args *args,
+ struct eeh_vfio_pci_addr *addr,
+ bool is_legacy,
+ struct eeh_dev **pedev,
+ struct eeh_pe **ppe)
+{
+   struct eeh_dev *edev;
+   struct eeh_pe *pe;
+
+   if (pedev) *pedev = NULL;
+   if (ppe) *ppe = NULL;
+
+   addr->kvm   = vcpu->kvm;
+   addr->buid_hi   = args->args[1];
+   addr->buid_lo   = args->args[2];
+   if (is_legacy) {
+   addr->bus   = (args->args[0] >> 16) & 0xFF;
+   addr->devfn = (args->args[0] >> 8) & 0xFF;
+
+   edev = eeh_vfio_dev_get(addr);
+   if (!edev) {
+   pr_warn("%s: Can't find VFIO device "
+   "(%08x-%08x-%02x-%02x)\n",
+   __func__, addr->buid_hi,
+   addr->buid_lo, addr->bus, addr->devfn);
+   return -EEXIST;
+   }
+
+   if (pedev) *pedev = edev;
+   if (ppe)   *ppe = edev->pe;
+   } else {
+   addr->pe_addr = args->args[0];
+   pe = eeh_vfio_pe_get(addr);
+   if (!pe) {
+   pr_warn("%s: Can't find PE (%08x-%08x-%x)\n",
+   __func__, addr->buid_hi,
+   addr->buid_lo, addr->pe_addr);
+   return -EEXIST;
+   }
+
+   if (ppe) *ppe = pe;
+   }
+
+   return 0;
+}
+
 /**
  * kvmppc_eeh_rtas - Backend for EEH RTAS emulation
  * @vcpu: KVM virtual CPU
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/22] powerpc/eeh: Function to tear down address mapping

2014-05-04 Thread Gavin Shan
The patch introduces function kvm_vfio_eeh_dev_unmap(), which is
expected to be called on IOCTL command issued to the VM device, in
order to tear down the address mapping for VFIO PCI device.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kernel/eeh_pe.c | 82 
 include/linux/kvm_host.h |  7 
 2 files changed, 89 insertions(+)

diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 200cd5a..8398efc 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -420,6 +420,88 @@ int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
 }
 EXPORT_SYMBOL_GPL(kvm_vfio_eeh_dev_map);
 
+ /**
+  * kvm_vfio_eeh_dev_unmap - Tear down address mapping for VFIO PCI device
+  *
+  * @kvm: VM descriptor
+  * @domain: host domain
+  * @bdn: host bus/device/function number
+  *
+  * Tear down address mapping for VFIO PCI device.
+  */
+int kvm_vfio_eeh_dev_unmap(struct kvm *kvm, int domain, int bdn)
+{
+   struct pci_bus *bus;
+   struct pci_dev *dev;
+   struct eeh_pe *pe;
+   struct eeh_dev *edev, *tmp;
+   int bus_no, devfn;
+   bool passed;
+
+   /* Find the PCI device in host side */
+   bus_no = (bdn >> 8) & 0xff;
+   devfn = bdn & 0xff;
+   bus = pci_find_bus(domain, bus_no);
+   if (!bus) {
+   pr_warn("%s: PCI bus %04x:%02x not found\n",
+   __func__, domain, bus_no);
+   return -ENODEV;
+   }
+
+   dev = pci_get_slot(bus, devfn);
+   if (!dev) {
+   pr_warn("%s: PCI device %04x:%02x:%02x.%01x not found\n",
+   __func__, domain, bus_no,
+   PCI_SLOT(devfn), PCI_FUNC(devfn));
+   return -ENODEV;
+   }
+
+   /* Mark the EEH device as non-passed */
+   edev = pci_dev_to_eeh_dev(dev);
+   if (!edev) {
+   pr_warn("%s: No EEH dev for PCI device %s\n",
+   __func__, pci_name(dev));
+   return -ENODEV;
+   } else if (!eeh_dev_passed(edev)||
+  !eeh_pe_passed(edev->pe) ||
+  edev->gaddr.kvm != kvm   ||
+  edev->pe->gaddr.kvm != kvm) {
+   pr_warn("%s: Non-passsed PCI dev %s or PE\n",
+   __func__, pci_name(dev));
+   return 0;
+   }
+   memset(&edev->gaddr, 0, sizeof(edev->gaddr));
+   eeh_dev_set_passed(edev, false);
+   pr_debug("EEH: Host PCI device %s returned\n",
+   pci_name(dev));
+
+   /*
+* Mark the PE as non-passed if all PCI devices
+* except P2P bridges are non-passed.
+*/
+   pe = edev->pe;
+   passed = false;
+   eeh_pe_for_each_dev(pe, edev, tmp) {
+   dev = eeh_dev_to_pci_dev(edev);
+   if (dev && dev->subordinate)
+   continue;
+   if (eeh_dev_passed(edev)) {
+   passed = true;
+   break;
+   }
+   }
+
+   if (!passed) {
+   memset(&pe->gaddr, 0, sizeof(pe->gaddr));
+   eeh_pe_set_passed(pe, false);
+   pr_debug("EEH: PHB#%x-PE#%x returned to host\n",
+   pe->phb->global_number, pe->addr);
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vfio_eeh_dev_unmap);
+
 static void *__kvmppc_eeh_vfio_release(void *data, void *flag)
 {
struct eeh_pe *pe = (struct eeh_pe *)data;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 294ce48..520b3d0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1106,14 +1106,21 @@ static inline void kvm_vcpu_set_dy_eligible(struct 
kvm_vcpu *vcpu, bool val)
 #ifdef CONFIG_KVM_EEH
 typedef int (*kvm_vfio_dev_eeh_map)(struct kvm *kvm, int domain,
int bdn, unsigned long buid, int gbdn);
+typedef int (*kvm_vfio_dev_eeh_unmap)(struct kvm *kvm, int domain, int bdn);
 extern int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
int bdn, unsigned long buid, int gbdn);
+extern int kvm_vfio_eeh_dev_unmap(struct kvm *kvm, int domain, int bdn);
 #else
 static inline int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
   int bdn, unsigned long buid, int gbdn)
 {
return 0;
 }
+
+static inline int kvm_vfio_eeh_dev_unmap(struct kvm *kvm, int domain, int bdn)
+{
+   return 0;
+}
 #endif /* CONFIG_KVM_EEH */
 
 #endif
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/22] powerpc/eeh: Function for address mapping

2014-05-04 Thread Gavin Shan
The patch introduces function kvm_vfio_eeh_dev_map(), which is
expected to be called on IOCTL command issued to the VM device, in
order to build the address mapping for VFIO PCI device.

Signed-off-by: Gavin Shan 
---
 arch/powerpc/kernel/eeh_pe.c | 88 
 include/linux/kvm_host.h | 14 +++
 2 files changed, 102 insertions(+)

diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 9e73188..200cd5a 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -332,6 +332,94 @@ struct eeh_dev *eeh_vfio_dev_get(struct eeh_vfio_pci_addr 
*addr)
return NULL;
 }
 
+/**
+ * kvm_vfio_eeh_dev_map - Build the address mapping for VFIO device
+ *
+ * @kvm: VM descriptor
+ * @domain: host domain of PCI device
+ * @bdn: host bus/device/function number
+ * @buid: BUID of guest PHB
+ * @gbdn: guest bus/device/function number
+ *
+ * Build the address mapping between host and guest deivce. It's called
+ * while passing through PCI device from host to guest.
+ */
+int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
+int bdn, unsigned long buid, int gbdn)
+{
+   struct pci_bus *bus, *pe_bus;
+   struct pci_dev *dev;
+   struct eeh_dev *edev;
+   struct eeh_pe *pe;
+   int bus_no, devfn;
+
+   /* Find the PCI device in host side */
+   bus_no = (bdn >> 8) & 0xff;
+   devfn = bdn & 0xff;
+   bus = pci_find_bus(domain, bus_no);
+   if (!bus) {
+   pr_warn("%s: PCI bus %04x:%02x not found\n",
+   __func__, domain, bus_no);
+   return -ENODEV;
+   }
+
+   dev = pci_get_slot(bus, devfn);
+   if (!dev) {
+   pr_warn("%s: PCI device %04x:%02x:%02x.%01x not found\n",
+   __func__, domain, bus_no,
+   PCI_SLOT(devfn), PCI_FUNC(devfn));
+   return -ENODEV;
+   }
+
+   /*
+* Mark the EEH device as passed. We allow dynamic change
+* on the address mapping.
+*/
+   edev = pci_dev_to_eeh_dev(dev);
+   if (!edev) {
+   pr_warn("%s: No EEH dev for PCI device %s\n",
+   __func__, pci_name(dev));
+   return -ENODEV;
+   }
+
+   /*
+* The PE configuration address is exactly PCI config address
+* of the PE primary bus. That has format 00BBSS00 defined in
+* PAPR.
+*/
+   pe = edev->pe;
+   if (!eeh_pe_passed(pe)) {
+   pe_bus = eeh_pe_bus_get(pe);
+   BUG_ON(!pe_bus);
+
+   pe->gaddr.kvm   = kvm;
+   pe->gaddr.buid_hi   = BUID_HI(buid);
+   pe->gaddr.buid_lo   = BUID_LO(buid);
+   pe->gaddr.pe_addr   = pe_bus->number << 16;
+   eeh_pe_set_passed(pe, true);
+   } else if (pe->gaddr.kvm != kvm ||
+  pe->gaddr.buid_hi != BUID_HI(buid) ||
+  pe->gaddr.buid_lo != BUID_LO(buid)) {
+   pr_warn("%s: Mismatched VM or PHB on passing %s\n",
+   __func__, pci_name(dev));
+   return -EINVAL;
+   }
+
+   edev->gaddr.kvm = kvm;
+   edev->gaddr.buid_hi = BUID_HI(buid);
+   edev->gaddr.buid_lo = BUID_LO(buid);
+   edev->gaddr.bus = (gbdn >> 8) & 0xff;
+   edev->gaddr.devfn   = gbdn & 0xff;
+   eeh_dev_set_passed(edev, true);
+
+   pr_debug("EEH: Host PCI device %s passed to %lx-%02x:%02x.%01x\n",
+pci_name(dev), buid, (gbdn >> 8) & 0xff,
+PCI_SLOT(gbdn & 0xff), PCI_FUNC(gbdn & 0xff));
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vfio_eeh_dev_map);
+
 static void *__kvmppc_eeh_vfio_release(void *data, void *flag)
 {
struct eeh_pe *pe = (struct eeh_pe *)data;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d21cf9..294ce48 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1102,5 +1102,19 @@ static inline void kvm_vcpu_set_dy_eligible(struct 
kvm_vcpu *vcpu, bool val)
 {
 }
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
+
+#ifdef CONFIG_KVM_EEH
+typedef int (*kvm_vfio_dev_eeh_map)(struct kvm *kvm, int domain,
+   int bdn, unsigned long buid, int gbdn);
+extern int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
+   int bdn, unsigned long buid, int gbdn);
+#else
+static inline int kvm_vfio_eeh_dev_map(struct kvm *kvm, int domain,
+  int bdn, unsigned long buid, int gbdn)
+{
+   return 0;
+}
+#endif /* CONFIG_KVM_EEH */
+
 #endif
 
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC 00/22] EEH Support for VFIO PCI devices on PowerKVM guest

2014-05-05 Thread Gavin Shan
On Mon, May 05, 2014 at 08:00:12AM -0600, Alex Williamson wrote:
>On Mon, 2014-05-05 at 13:56 +0200, Alexander Graf wrote:
>> On 05/05/2014 03:27 AM, Gavin Shan wrote:
>> > The series of patches intends to support EEH for PCI devices, which have 
>> > been
>> > passed through to PowerKVM based guest via VFIO. The implementation is
>> > straightforward based on the issues or problems we have to resolve to 
>> > support
>> > EEH for PowerKVM based guest.
>> >
>> > - Emulation for EEH RTAS requests. Thanksfully, we already have 
>> > infrastructure
>> >to emulate XICS. Without introducing new mechanism, we just extend that
>> >existing infrastructure to support EEH RTAS emulation. EEH RTAS requests
>> >initiated from guest are posted to host where the requests get handled 
>> > or
>> >delivered to underly firmware for further handling. For that, the host 
>> > kerenl
>> >has to maintain the PCI address (host domain/bus/slot/function to 
>> > guest's
>> >PHB BUID/bus/slot/function) mapping via KVM VFIO device. The address 
>> > mapping
>> >will be built when initializing VFIO device in QEMU and destroied when 
>> > the
>> >VFIO device in QEMU is going to offline, or VM is destroy.
>> 
>> Do you also expose all those interfaces to user space? VFIO is as much 
>> about user space device drivers as it is about device assignment.
>> 

Yep, all the interfaces are exported to user space. 

>> I would like to first see an implementation that doesn't touch KVM 
>> emulation code at all but instead routes everything through QEMU. As a 
>> second step we can then accelerate performance critical paths inside of KVM.
>> 

Ok. I'll change the implementation. However, the QEMU still has to
poll/push information from/to host kerenl. So the best place for that
would be tce_iommu_driver_ops::ioctl as EEH is Power specific feature.

For the error injection, I guess I have to put the logic token management
into QEMU and error injection request will be handled by QEMU and then
routed to host kernel via additional syscall as we did for pSeries.

>> That way we ensure that user space device drivers have all the power 
>> over a device they need to drive it.
>
>+1
>

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] drivers/vfio/pci: Fix wrong MSI interrupt count

2014-05-12 Thread Gavin Shan
According PCI local bus specification, the register of Message
Control for MSI (offset: 2, length: 2) has bit#0 to enable or
disable MSI logic and it shouldn't be part contributing to the
calculation of MSI interrupt count. The patch fixes the issue.

Signed-off-by: Gavin Shan 
---
 drivers/vfio/pci/vfio_pci.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 7ba0424..6b8cd07 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -196,8 +196,7 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device 
*vdev, int irq_type)
if (pos) {
pci_read_config_word(vdev->pdev,
 pos + PCI_MSI_FLAGS, &flags);
-
-   return 1 << (flags & PCI_MSI_FLAGS_QMASK);
+   return 1 << ((flags & PCI_MSI_FLAGS_QMASK) >> 1);
}
} else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX) {
u8 pos;
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] drivers/vfio: Rework offsetofend()

2014-05-12 Thread Gavin Shan
The macro offsetofend() introduces unnecessary temporary variable
"tmp". The patch avoids that and saves a bit memory in stack.

Signed-off-by: Gavin Shan 
---
 include/linux/vfio.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 81022a52..8ec980b 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -86,9 +86,8 @@ extern void vfio_unregister_iommu_driver(
  * from user space.  This allows us to easily determine if the provided
  * structure is sized to include various fields.
  */
-#define offsetofend(TYPE, MEMBER) ({   \
-   TYPE tmp;   \
-   offsetof(TYPE, MEMBER) + sizeof(tmp.MEMBER); }) \
+#define offsetofend(TYPE, MEMBER) \
+   (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
 
 /*
  * External user API
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >