RE: [PATCH v9 01/13] KVM: PPC: POWERNV: move iommu_add_device earlier

2013-10-29 Thread Bhushan Bharat-R65777
Hi Alex,

Looks like this patch is not picked by anyone, Are you going to pick this patch?
My vfio/iommu patches have dependency on this patch (this is already tested by 
me).

Thanks
-Bharat

> -Original Message-
> From: Linuxppc-dev [mailto:linuxppc-dev-
> bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Alexey
> Kardashevskiy
> Sent: Wednesday, August 28, 2013 2:08 PM
> To: linuxppc-...@lists.ozlabs.org
> Cc: k...@vger.kernel.org; Gleb Natapov; Alexey Kardashevskiy; Alexander Graf;
> kvm-...@vger.kernel.org; linux-kernel@vger.kernel.org; linux...@kvack.org; 
> Paul
> Mackerras; Paolo Bonzini; David Gibson
> Subject: [PATCH v9 01/13] KVM: PPC: POWERNV: move iommu_add_device earlier
> 
> The current implementation of IOMMU on sPAPR does not use iommu_ops and
> therefore does not call IOMMU API's bus_set_iommu() which
> 1) sets iommu_ops for a bus
> 2) registers a bus notifier
> Instead, PCI devices are added to IOMMU groups from
> subsys_initcall_sync(tce_iommu_init) which does basically the same thing 
> without
> using iommu_ops callbacks.
> 
> However Freescale PAMU driver (https://lkml.org/lkml/2013/7/1/158)
> implements iommu_ops and when tce_iommu_init is called, every PCI device is
> already added to some group so there is a conflict.
> 
> This patch does 2 things:
> 1. removes the loop in which PCI devices were added to groups and adds 
> explicit
> iommu_add_device() calls to add devices as soon as they get the iommu_table
> pointer assigned to them.
> 2. moves a bus notifier to powernv code in order to avoid conflict with the
> notifier from Freescale driver.
> 
> iommu_add_device() and iommu_del_device() are public now.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
> Changes:
> v8:
> * added the check for iommu_group!=NULL before removing device from a group as
> suggested by Wei Yang 
> 
> v2:
> * added a helper - set_iommu_table_base_and_group - which does
> set_iommu_table_base() and iommu_add_device()
> ---
>  arch/powerpc/include/asm/iommu.h|  9 +++
>  arch/powerpc/kernel/iommu.c | 41 
> +++--
>  arch/powerpc/platforms/powernv/pci-ioda.c   |  8 +++---
>  arch/powerpc/platforms/powernv/pci-p5ioc2.c |  2 +-
>  arch/powerpc/platforms/powernv/pci.c| 33 ++-
>  arch/powerpc/platforms/pseries/iommu.c  |  8 +++---
>  6 files changed, 55 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/iommu.h 
> b/arch/powerpc/include/asm/iommu.h
> index c34656a..19ad77f 100644
> --- a/arch/powerpc/include/asm/iommu.h
> +++ b/arch/powerpc/include/asm/iommu.h
> @@ -103,6 +103,15 @@ extern struct iommu_table *iommu_init_table(struct
> iommu_table * tbl,
>   int nid);
>  extern void iommu_register_group(struct iommu_table *tbl,
>int pci_domain_number, unsigned long pe_num);
> +extern int iommu_add_device(struct device *dev); extern void
> +iommu_del_device(struct device *dev);
> +
> +static inline void set_iommu_table_base_and_group(struct device *dev,
> +   void *base)
> +{
> + set_iommu_table_base(dev, base);
> + iommu_add_device(dev);
> +}
> 
>  extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
>   struct scatterlist *sglist, int nelems, diff --git
> a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index
> b20ff17..15f8ca8 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -1105,7 +1105,7 @@ void iommu_release_ownership(struct iommu_table *tbl)  }
> EXPORT_SYMBOL_GPL(iommu_release_ownership);
> 
> -static int iommu_add_device(struct device *dev)
> +int iommu_add_device(struct device *dev)
>  {
>   struct iommu_table *tbl;
>   int ret = 0;
> @@ -1134,46 +1134,13 @@ static int iommu_add_device(struct device *dev)
> 
>   return ret;
>  }
> +EXPORT_SYMBOL_GPL(iommu_add_device);
> 
> -static void iommu_del_device(struct device *dev)
> +void iommu_del_device(struct device *dev)
>  {
>   iommu_group_remove_device(dev);
>  }
> -
> -static int iommu_bus_notifier(struct notifier_block *nb,
> -   unsigned long action, void *data)
> -{
> - struct device *dev = data;
> -
> - switch (action) {
> - case BUS_NOTIFY_ADD_DEVICE:
> - return iommu_add_device(dev);
> - case BUS_NOTIFY_DEL_DEVICE:
> - iommu_del_device(dev);
> - return 0;
> - default:
> - return 0;
> - }
> -}
> -
> -static struct notifier_block tce_iommu_bus_nb = {
> - .notifier_call = iommu_bus_notifier,
> -};
> -
> -static int __init tce_iommu_init(void)
> -{
> - struct pci_dev *pdev = NULL;
> -
> - BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE);
> -
> - for_each_pci_dev(pdev)
> - iommu_add_device(>dev);
> -
> - bus_register_notifier(_bus_type, _iommu_bus_nb);
> - return 0;
> -}
> -
> 

RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-29 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Wood Scott-B07421
> Sent: Tuesday, October 29, 2013 10:25 AM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
> christoffer.d...@linaro.org; linux-kernel@vger.kernel.org;
> a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
> peter.mayd...@linaro.org; santosh.shu...@linaro.org; k...@vger.kernel.org;
> gre...@linuxfoundation.org
> Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
> sysfs only
> 
> On Mon, 2013-10-28 at 23:45 -0500, Bhushan Bharat-R65777 wrote:
> >
> > > -Original Message-
> > > From: Wood Scott-B07421
> > > Sent: Tuesday, October 29, 2013 10:05 AM
> > > To: Bhushan Bharat-R65777
> > > Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
> > > Stuart-B08248; christoffer.d...@linaro.org;
> > > linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
> > > ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
> > > santosh.shu...@linaro.org; k...@vger.kernel.org;
> > > gre...@linuxfoundation.org
> > > Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
> > > binding via sysfs only
> > >
> > > On Mon, 2013-10-28 at 23:31 -0500, Bhushan Bharat-R65777 wrote:
> > > >
> > > > > -Original Message-
> > > > > From: Wood Scott-B07421
> > > > > Sent: Tuesday, October 29, 2013 10:00 AM
> > > > > To: Bhushan Bharat-R65777
> > > > > Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
> > > > > Stuart-B08248; christoffer.d...@linaro.org;
> > > > > linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
> > > > > ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
> > > > > santosh.shu...@linaro.org; k...@vger.kernel.org;
> > > > > gre...@linuxfoundation.org
> > > > > Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
> > > > > binding via sysfs only
> > > > >
> > > > > On Mon, 2013-10-28 at 22:52 -0500, Bhushan Bharat-R65777 wrote:
> > > > > > So when ids == NULL it does not check of vendor etc and calls
> > > > > > pci_add_dynid()
> > > > > which in turn calls driver_attach().
> > > > > >
> > > > > > If we change the above loop to break if ids->vendor ==
> > > > > >PCI_ANY_ID && ids- subvendor == PCI_ANY_ID then also we will call
> pci_add_dyids().
> > > > >
> > > > > What problem are you trying to solve?
> > > >
> > > > new_id interface to continue working as before.
> > >
> > > In what specific way does this allow new_id to continue working as
> > > before?  Be verbose.
> >
> >
> > What I observed that this patch (kim's patch) new_id interface stops 
> > working.
> 
> Yes.
> 
> >  This is found to be because store_new_id() checks for pdrv->id_table
> > which is no more NULL, so the below check fails
> 
> I do not think that is the reason.  The reason is because sysfs_bind_only is
> set, and this is not a direct sysfs bind.
> 
> > if (ids) {
> > ^^
> > This is no more NULL, so enter inside the loop
> >
> > retval = -EINVAL;
> > while (ids->vendor || ids->subvendor || ids->class_mask) {
> > if (driver_data == ids->driver_data) {
> > retval = 0;
> > break;
> > }
> > ids++;
> > }
> > if (retval)   /* No match */
> > return retval; ^ This is where it returns
> > as -EINVAL
> 
> Why wouldn't it have broken out of the loop earlier, since driver_data and 
> ids-
> >driver_data should both be zero?  I assume this is with a patch to do
> PCI_ANY_ID in vfio-pci.

hmmm, I am pretty sure I have seen that issue a few time (below is command line 
output) but now I am not getting any error reported. Although device is not 
binding to driver because of sysfs_bind_only as you mentioned (I thought of 
this as a second issue). If I will be able to reproduce the first issue then I 
will let you guys know otherwise there was no first issue :(

root@p5040ds:/sys/bus/pci# echo :01:00.0 > 
devices/\:01\:00.0/driver/unbind
e1000e :01:00.0 eth0: removed PHC
root@p5040ds:/sys/bus/pci# echo 8086 10d3 > drivers/vfio-pci/new_id
-sh: echo: write error: Invalid argument
root@p5040ds:/sys/bus/pci# echo :01:00.0 > drivers/vfio-pci/bind

-Bharat

> 
> -Scott
> 



RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-29 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 10:25 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
 christoffer.d...@linaro.org; linux-kernel@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
 peter.mayd...@linaro.org; santosh.shu...@linaro.org; k...@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Mon, 2013-10-28 at 23:45 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Tuesday, October 29, 2013 10:05 AM
   To: Bhushan Bharat-R65777
   Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
   Stuart-B08248; christoffer.d...@linaro.org;
   linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
   ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; k...@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
   binding via sysfs only
  
   On Mon, 2013-10-28 at 23:31 -0500, Bhushan Bharat-R65777 wrote:
   
 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 10:00 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
 Stuart-B08248; christoffer.d...@linaro.org;
 linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
 ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
 santosh.shu...@linaro.org; k...@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
 binding via sysfs only

 On Mon, 2013-10-28 at 22:52 -0500, Bhushan Bharat-R65777 wrote:
  So when ids == NULL it does not check of vendor etc and calls
  pci_add_dynid()
 which in turn calls driver_attach().
 
  If we change the above loop to break if ids-vendor ==
 PCI_ANY_ID  ids- subvendor == PCI_ANY_ID then also we will call
 pci_add_dyids().

 What problem are you trying to solve?
   
new_id interface to continue working as before.
  
   In what specific way does this allow new_id to continue working as
   before?  Be verbose.
 
 
  What I observed that this patch (kim's patch) new_id interface stops 
  working.
 
 Yes.
 
   This is found to be because store_new_id() checks for pdrv-id_table
  which is no more NULL, so the below check fails
 
 I do not think that is the reason.  The reason is because sysfs_bind_only is
 set, and this is not a direct sysfs bind.
 
  if (ids) {
  ^^
  This is no more NULL, so enter inside the loop
 
  retval = -EINVAL;
  while (ids-vendor || ids-subvendor || ids-class_mask) {
  if (driver_data == ids-driver_data) {
  retval = 0;
  break;
  }
  ids++;
  }
  if (retval)   /* No match */
  return retval; ^ This is where it returns
  as -EINVAL
 
 Why wouldn't it have broken out of the loop earlier, since driver_data and 
 ids-
 driver_data should both be zero?  I assume this is with a patch to do
 PCI_ANY_ID in vfio-pci.

hmmm, I am pretty sure I have seen that issue a few time (below is command line 
output) but now I am not getting any error reported. Although device is not 
binding to driver because of sysfs_bind_only as you mentioned (I thought of 
this as a second issue). If I will be able to reproduce the first issue then I 
will let you guys know otherwise there was no first issue :(

root@p5040ds:/sys/bus/pci# echo :01:00.0  
devices/\:01\:00.0/driver/unbind
e1000e :01:00.0 eth0: removed PHC
root@p5040ds:/sys/bus/pci# echo 8086 10d3  drivers/vfio-pci/new_id
-sh: echo: write error: Invalid argument
root@p5040ds:/sys/bus/pci# echo :01:00.0  drivers/vfio-pci/bind

-Bharat

 
 -Scott
 



RE: [PATCH v9 01/13] KVM: PPC: POWERNV: move iommu_add_device earlier

2013-10-29 Thread Bhushan Bharat-R65777
Hi Alex,

Looks like this patch is not picked by anyone, Are you going to pick this patch?
My vfio/iommu patches have dependency on this patch (this is already tested by 
me).

Thanks
-Bharat

 -Original Message-
 From: Linuxppc-dev [mailto:linuxppc-dev-
 bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Alexey
 Kardashevskiy
 Sent: Wednesday, August 28, 2013 2:08 PM
 To: linuxppc-...@lists.ozlabs.org
 Cc: k...@vger.kernel.org; Gleb Natapov; Alexey Kardashevskiy; Alexander Graf;
 kvm-...@vger.kernel.org; linux-kernel@vger.kernel.org; linux...@kvack.org; 
 Paul
 Mackerras; Paolo Bonzini; David Gibson
 Subject: [PATCH v9 01/13] KVM: PPC: POWERNV: move iommu_add_device earlier
 
 The current implementation of IOMMU on sPAPR does not use iommu_ops and
 therefore does not call IOMMU API's bus_set_iommu() which
 1) sets iommu_ops for a bus
 2) registers a bus notifier
 Instead, PCI devices are added to IOMMU groups from
 subsys_initcall_sync(tce_iommu_init) which does basically the same thing 
 without
 using iommu_ops callbacks.
 
 However Freescale PAMU driver (https://lkml.org/lkml/2013/7/1/158)
 implements iommu_ops and when tce_iommu_init is called, every PCI device is
 already added to some group so there is a conflict.
 
 This patch does 2 things:
 1. removes the loop in which PCI devices were added to groups and adds 
 explicit
 iommu_add_device() calls to add devices as soon as they get the iommu_table
 pointer assigned to them.
 2. moves a bus notifier to powernv code in order to avoid conflict with the
 notifier from Freescale driver.
 
 iommu_add_device() and iommu_del_device() are public now.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
 Changes:
 v8:
 * added the check for iommu_group!=NULL before removing device from a group as
 suggested by Wei Yang weiy...@linux.vnet.ibm.com
 
 v2:
 * added a helper - set_iommu_table_base_and_group - which does
 set_iommu_table_base() and iommu_add_device()
 ---
  arch/powerpc/include/asm/iommu.h|  9 +++
  arch/powerpc/kernel/iommu.c | 41 
 +++--
  arch/powerpc/platforms/powernv/pci-ioda.c   |  8 +++---
  arch/powerpc/platforms/powernv/pci-p5ioc2.c |  2 +-
  arch/powerpc/platforms/powernv/pci.c| 33 ++-
  arch/powerpc/platforms/pseries/iommu.c  |  8 +++---
  6 files changed, 55 insertions(+), 46 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/iommu.h 
 b/arch/powerpc/include/asm/iommu.h
 index c34656a..19ad77f 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
 @@ -103,6 +103,15 @@ extern struct iommu_table *iommu_init_table(struct
 iommu_table * tbl,
   int nid);
  extern void iommu_register_group(struct iommu_table *tbl,
int pci_domain_number, unsigned long pe_num);
 +extern int iommu_add_device(struct device *dev); extern void
 +iommu_del_device(struct device *dev);
 +
 +static inline void set_iommu_table_base_and_group(struct device *dev,
 +   void *base)
 +{
 + set_iommu_table_base(dev, base);
 + iommu_add_device(dev);
 +}
 
  extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
   struct scatterlist *sglist, int nelems, diff --git
 a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index
 b20ff17..15f8ca8 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
 @@ -1105,7 +1105,7 @@ void iommu_release_ownership(struct iommu_table *tbl)  }
 EXPORT_SYMBOL_GPL(iommu_release_ownership);
 
 -static int iommu_add_device(struct device *dev)
 +int iommu_add_device(struct device *dev)
  {
   struct iommu_table *tbl;
   int ret = 0;
 @@ -1134,46 +1134,13 @@ static int iommu_add_device(struct device *dev)
 
   return ret;
  }
 +EXPORT_SYMBOL_GPL(iommu_add_device);
 
 -static void iommu_del_device(struct device *dev)
 +void iommu_del_device(struct device *dev)
  {
   iommu_group_remove_device(dev);
  }
 -
 -static int iommu_bus_notifier(struct notifier_block *nb,
 -   unsigned long action, void *data)
 -{
 - struct device *dev = data;
 -
 - switch (action) {
 - case BUS_NOTIFY_ADD_DEVICE:
 - return iommu_add_device(dev);
 - case BUS_NOTIFY_DEL_DEVICE:
 - iommu_del_device(dev);
 - return 0;
 - default:
 - return 0;
 - }
 -}
 -
 -static struct notifier_block tce_iommu_bus_nb = {
 - .notifier_call = iommu_bus_notifier,
 -};
 -
 -static int __init tce_iommu_init(void)
 -{
 - struct pci_dev *pdev = NULL;
 -
 - BUILD_BUG_ON(PAGE_SIZE  IOMMU_PAGE_SIZE);
 -
 - for_each_pci_dev(pdev)
 - iommu_add_device(pdev-dev);
 -
 - bus_register_notifier(pci_bus_type, tce_iommu_bus_nb);
 - return 0;
 -}
 -
 -subsys_initcall_sync(tce_iommu_init);
 +EXPORT_SYMBOL_GPL(iommu_del_device);
 
  

RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-29 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 10:25 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
 christoffer.d...@linaro.org; linux-ker...@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
 peter.mayd...@linaro.org; santosh.shu...@linaro.org; kvm@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Mon, 2013-10-28 at 23:45 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Tuesday, October 29, 2013 10:05 AM
   To: Bhushan Bharat-R65777
   Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
   Stuart-B08248; christoffer.d...@linaro.org;
   linux-ker...@vger.kernel.org; a.mota...@virtualopensystems.com;
   ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; kvm@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
   binding via sysfs only
  
   On Mon, 2013-10-28 at 23:31 -0500, Bhushan Bharat-R65777 wrote:
   
 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 10:00 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
 Stuart-B08248; christoffer.d...@linaro.org;
 linux-ker...@vger.kernel.org; a.mota...@virtualopensystems.com;
 ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
 santosh.shu...@linaro.org; kvm@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
 binding via sysfs only

 On Mon, 2013-10-28 at 22:52 -0500, Bhushan Bharat-R65777 wrote:
  So when ids == NULL it does not check of vendor etc and calls
  pci_add_dynid()
 which in turn calls driver_attach().
 
  If we change the above loop to break if ids-vendor ==
 PCI_ANY_ID  ids- subvendor == PCI_ANY_ID then also we will call
 pci_add_dyids().

 What problem are you trying to solve?
   
new_id interface to continue working as before.
  
   In what specific way does this allow new_id to continue working as
   before?  Be verbose.
 
 
  What I observed that this patch (kim's patch) new_id interface stops 
  working.
 
 Yes.
 
   This is found to be because store_new_id() checks for pdrv-id_table
  which is no more NULL, so the below check fails
 
 I do not think that is the reason.  The reason is because sysfs_bind_only is
 set, and this is not a direct sysfs bind.
 
  if (ids) {
  ^^
  This is no more NULL, so enter inside the loop
 
  retval = -EINVAL;
  while (ids-vendor || ids-subvendor || ids-class_mask) {
  if (driver_data == ids-driver_data) {
  retval = 0;
  break;
  }
  ids++;
  }
  if (retval)   /* No match */
  return retval; ^ This is where it returns
  as -EINVAL
 
 Why wouldn't it have broken out of the loop earlier, since driver_data and 
 ids-
 driver_data should both be zero?  I assume this is with a patch to do
 PCI_ANY_ID in vfio-pci.

hmmm, I am pretty sure I have seen that issue a few time (below is command line 
output) but now I am not getting any error reported. Although device is not 
binding to driver because of sysfs_bind_only as you mentioned (I thought of 
this as a second issue). If I will be able to reproduce the first issue then I 
will let you guys know otherwise there was no first issue :(

root@p5040ds:/sys/bus/pci# echo :01:00.0  
devices/\:01\:00.0/driver/unbind
e1000e :01:00.0 eth0: removed PHC
root@p5040ds:/sys/bus/pci# echo 8086 10d3  drivers/vfio-pci/new_id
-sh: echo: write error: Invalid argument
root@p5040ds:/sys/bus/pci# echo :01:00.0  drivers/vfio-pci/bind

-Bharat

 
 -Scott
 



RE: [PATCH 2/7] Initial skeleton of VFIO support for Device Tree based devices

2013-10-29 Thread Bhushan Bharat-R65777


 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf 
 Of
 Alex Williamson
 Sent: Tuesday, October 29, 2013 5:17 PM
 To: Don Dutile
 Cc: Bhushan Bharat-R65777; Antonios Motakis; kvm...@lists.cs.columbia.edu;
 linux-samsung-...@vger.kernel.org; kvm@vger.kernel.org; ag...@suse.de; Yoder
 Stuart-B08248; io...@lists.linux-foundation.org; t...@virtualopensystems.com
 Subject: Re: [PATCH 2/7] Initial skeleton of VFIO support for Device Tree 
 based
 devices
 
 On Mon, 2013-10-28 at 21:29 -0400, Don Dutile wrote:
  On 09/30/2013 11:37 AM, Bhushan Bharat-R65777 wrote:
  
  
   -Original Message-
   From: iommu-boun...@lists.linux-foundation.org [mailto:iommu-
   boun...@lists.linux-foundation.org] On Behalf Of Antonios Motakis
   Sent: Monday, September 30, 2013 8:59 PM
   To: kvm...@lists.cs.columbia.edu; alex.william...@redhat.com
   Cc: linux-samsung-...@vger.kernel.org; kvm@vger.kernel.org;
   ag...@suse.de; Yoder Stuart-B08248;
   io...@lists.linux-foundation.org; Antonios Motakis;
   t...@virtualopensystems.com
   Subject: [PATCH 2/7] Initial skeleton of VFIO support for Device
   Tree based devices
  
   Platform devices in the Linux kernel are usually managed by the DT
 interface.
   This patch forms the base to support these kind of devices with VFIO.
  
   Signed-off-by: Antonios Motakisa.mota...@virtualopensystems.com
   ---
 drivers/vfio/Kconfig |  11 +++
 drivers/vfio/Makefile|   1 +
 drivers/vfio/vfio_platform.c | 187
 +++
 include/uapi/linux/vfio.h|   1 +
 4 files changed, 200 insertions(+)
 create mode 100644 drivers/vfio/vfio_platform.c
  
   diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index
   1f84eda..35254b7
   100644
   --- a/drivers/vfio/Kconfig
   +++ b/drivers/vfio/Kconfig
   @@ -13,4 +13,15 @@ menuconfig VFIO
  
  If you don't know what to do here, say N.
  
   +config VFIO_PLATFORM
   +tristate VFIO support for device tree based platform devices
   +depends on VFIO  EVENTFD  OF
   +help
   +  Support for platform devices with VFIO. This is required to 
   make
   +  use of platform devices present on device tree nodes using 
   the VFIO
   +  framework. Devices that are not described in the device tree 
   cannot
   +  be used by this driver.
   +
   +  If you don't know what to do here, say N.
   +
 source drivers/vfio/pci/Kconfig
   diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index
   2398d4a..575c8dd 100644
   --- a/drivers/vfio/Makefile
   +++ b/drivers/vfio/Makefile
   @@ -1,3 +1,4 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_PCI) += pci/
   +obj-$(CONFIG_VFIO_PLATFORM) += vfio_platform.o
   diff --git a/drivers/vfio/vfio_platform.c
   b/drivers/vfio/vfio_platform.c new
  
   We can make this parallel to PCI, something like
   drivers/vfio/platform/platform.c
  
  pls, no.  'platform' is too generic, and it really means 'arm-dt' ...
  so can move it to the arch/arm space, and have it's kconfig conditional on
 ARMVFIO.
  if kept under drivers/vfio, then use a better directory name that ties it to
 arm-dt.
  thanks.
 
 The intention is that vfio platform device support is not arm-dt specific.  
 This
 is to be used by both arm and embedded ppc.  The devices we intend to support
 with them are known as platform drivers in the kernel, thus the name.  I 
 suppose
 the question remains whether the interface here is really generic for any
 platform device or whether we're making whether we're making an interface
 specifically for device tree platform devices, or if those are one in the 
 same.
 In any case, arm-dt is certainly not the answer.

I think we should stick to platform.

-Bharat

 
 Alex
 
   file mode 100644 index 000..b9686b0
   --- /dev/null
   +++ b/drivers/vfio/vfio_platform.c
   @@ -0,0 +1,187 @@
   +/*
   + * Copyright (C) 2013 - Virtual Open Systems
   + * Author: Antonios Motakisa.mota...@virtualopensystems.com
   + *
   + * This program is free software; you can redistribute it and/or
   +modify
   + * it under the terms of the GNU General Public License, version
   +2, as
   + * published by the Free Software Foundation.
   + *
   + * This program is distributed in the hope that it will be useful,
   + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   + * GNU General Public License for more details.
   + *
   + * You should have received a copy of the GNU General Public
   +License
   + * along with this program; if not, write to the Free Software
   + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
 USA.
   + */
   +
   +#includelinux/device.h
   +#includelinux/eventfd.h
   +#includelinux/interrupt.h
   +#includelinux/iommu.h

RE: [PATCH v9 01/13] KVM: PPC: POWERNV: move iommu_add_device earlier

2013-10-29 Thread Bhushan Bharat-R65777
Hi Alex,

Looks like this patch is not picked by anyone, Are you going to pick this patch?
My vfio/iommu patches have dependency on this patch (this is already tested by 
me).

Thanks
-Bharat

 -Original Message-
 From: Linuxppc-dev [mailto:linuxppc-dev-
 bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Alexey
 Kardashevskiy
 Sent: Wednesday, August 28, 2013 2:08 PM
 To: linuxppc-...@lists.ozlabs.org
 Cc: kvm@vger.kernel.org; Gleb Natapov; Alexey Kardashevskiy; Alexander Graf;
 kvm-...@vger.kernel.org; linux-ker...@vger.kernel.org; linux...@kvack.org; 
 Paul
 Mackerras; Paolo Bonzini; David Gibson
 Subject: [PATCH v9 01/13] KVM: PPC: POWERNV: move iommu_add_device earlier
 
 The current implementation of IOMMU on sPAPR does not use iommu_ops and
 therefore does not call IOMMU API's bus_set_iommu() which
 1) sets iommu_ops for a bus
 2) registers a bus notifier
 Instead, PCI devices are added to IOMMU groups from
 subsys_initcall_sync(tce_iommu_init) which does basically the same thing 
 without
 using iommu_ops callbacks.
 
 However Freescale PAMU driver (https://lkml.org/lkml/2013/7/1/158)
 implements iommu_ops and when tce_iommu_init is called, every PCI device is
 already added to some group so there is a conflict.
 
 This patch does 2 things:
 1. removes the loop in which PCI devices were added to groups and adds 
 explicit
 iommu_add_device() calls to add devices as soon as they get the iommu_table
 pointer assigned to them.
 2. moves a bus notifier to powernv code in order to avoid conflict with the
 notifier from Freescale driver.
 
 iommu_add_device() and iommu_del_device() are public now.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
 Changes:
 v8:
 * added the check for iommu_group!=NULL before removing device from a group as
 suggested by Wei Yang weiy...@linux.vnet.ibm.com
 
 v2:
 * added a helper - set_iommu_table_base_and_group - which does
 set_iommu_table_base() and iommu_add_device()
 ---
  arch/powerpc/include/asm/iommu.h|  9 +++
  arch/powerpc/kernel/iommu.c | 41 
 +++--
  arch/powerpc/platforms/powernv/pci-ioda.c   |  8 +++---
  arch/powerpc/platforms/powernv/pci-p5ioc2.c |  2 +-
  arch/powerpc/platforms/powernv/pci.c| 33 ++-
  arch/powerpc/platforms/pseries/iommu.c  |  8 +++---
  6 files changed, 55 insertions(+), 46 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/iommu.h 
 b/arch/powerpc/include/asm/iommu.h
 index c34656a..19ad77f 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
 @@ -103,6 +103,15 @@ extern struct iommu_table *iommu_init_table(struct
 iommu_table * tbl,
   int nid);
  extern void iommu_register_group(struct iommu_table *tbl,
int pci_domain_number, unsigned long pe_num);
 +extern int iommu_add_device(struct device *dev); extern void
 +iommu_del_device(struct device *dev);
 +
 +static inline void set_iommu_table_base_and_group(struct device *dev,
 +   void *base)
 +{
 + set_iommu_table_base(dev, base);
 + iommu_add_device(dev);
 +}
 
  extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
   struct scatterlist *sglist, int nelems, diff --git
 a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index
 b20ff17..15f8ca8 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
 @@ -1105,7 +1105,7 @@ void iommu_release_ownership(struct iommu_table *tbl)  }
 EXPORT_SYMBOL_GPL(iommu_release_ownership);
 
 -static int iommu_add_device(struct device *dev)
 +int iommu_add_device(struct device *dev)
  {
   struct iommu_table *tbl;
   int ret = 0;
 @@ -1134,46 +1134,13 @@ static int iommu_add_device(struct device *dev)
 
   return ret;
  }
 +EXPORT_SYMBOL_GPL(iommu_add_device);
 
 -static void iommu_del_device(struct device *dev)
 +void iommu_del_device(struct device *dev)
  {
   iommu_group_remove_device(dev);
  }
 -
 -static int iommu_bus_notifier(struct notifier_block *nb,
 -   unsigned long action, void *data)
 -{
 - struct device *dev = data;
 -
 - switch (action) {
 - case BUS_NOTIFY_ADD_DEVICE:
 - return iommu_add_device(dev);
 - case BUS_NOTIFY_DEL_DEVICE:
 - iommu_del_device(dev);
 - return 0;
 - default:
 - return 0;
 - }
 -}
 -
 -static struct notifier_block tce_iommu_bus_nb = {
 - .notifier_call = iommu_bus_notifier,
 -};
 -
 -static int __init tce_iommu_init(void)
 -{
 - struct pci_dev *pdev = NULL;
 -
 - BUILD_BUG_ON(PAGE_SIZE  IOMMU_PAGE_SIZE);
 -
 - for_each_pci_dev(pdev)
 - iommu_add_device(pdev-dev);
 -
 - bus_register_notifier(pci_bus_type, tce_iommu_bus_nb);
 - return 0;
 -}
 -
 -subsys_initcall_sync(tce_iommu_init);
 +EXPORT_SYMBOL_GPL(iommu_del_device);
 
  

RE: [PATCH v9 01/13] KVM: PPC: POWERNV: move iommu_add_device earlier

2013-10-29 Thread Bhushan Bharat-R65777
Hi Alex,

Looks like this patch is not picked by anyone, Are you going to pick this patch?
My vfio/iommu patches have dependency on this patch (this is already tested by 
me).

Thanks
-Bharat

 -Original Message-
 From: Linuxppc-dev [mailto:linuxppc-dev-
 bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Alexey
 Kardashevskiy
 Sent: Wednesday, August 28, 2013 2:08 PM
 To: linuxppc-...@lists.ozlabs.org
 Cc: k...@vger.kernel.org; Gleb Natapov; Alexey Kardashevskiy; Alexander Graf;
 kvm-ppc@vger.kernel.org; linux-ker...@vger.kernel.org; linux...@kvack.org; 
 Paul
 Mackerras; Paolo Bonzini; David Gibson
 Subject: [PATCH v9 01/13] KVM: PPC: POWERNV: move iommu_add_device earlier
 
 The current implementation of IOMMU on sPAPR does not use iommu_ops and
 therefore does not call IOMMU API's bus_set_iommu() which
 1) sets iommu_ops for a bus
 2) registers a bus notifier
 Instead, PCI devices are added to IOMMU groups from
 subsys_initcall_sync(tce_iommu_init) which does basically the same thing 
 without
 using iommu_ops callbacks.
 
 However Freescale PAMU driver (https://lkml.org/lkml/2013/7/1/158)
 implements iommu_ops and when tce_iommu_init is called, every PCI device is
 already added to some group so there is a conflict.
 
 This patch does 2 things:
 1. removes the loop in which PCI devices were added to groups and adds 
 explicit
 iommu_add_device() calls to add devices as soon as they get the iommu_table
 pointer assigned to them.
 2. moves a bus notifier to powernv code in order to avoid conflict with the
 notifier from Freescale driver.
 
 iommu_add_device() and iommu_del_device() are public now.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
 Changes:
 v8:
 * added the check for iommu_group!=NULL before removing device from a group as
 suggested by Wei Yang weiy...@linux.vnet.ibm.com
 
 v2:
 * added a helper - set_iommu_table_base_and_group - which does
 set_iommu_table_base() and iommu_add_device()
 ---
  arch/powerpc/include/asm/iommu.h|  9 +++
  arch/powerpc/kernel/iommu.c | 41 
 +++--
  arch/powerpc/platforms/powernv/pci-ioda.c   |  8 +++---
  arch/powerpc/platforms/powernv/pci-p5ioc2.c |  2 +-
  arch/powerpc/platforms/powernv/pci.c| 33 ++-
  arch/powerpc/platforms/pseries/iommu.c  |  8 +++---
  6 files changed, 55 insertions(+), 46 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/iommu.h 
 b/arch/powerpc/include/asm/iommu.h
 index c34656a..19ad77f 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
 @@ -103,6 +103,15 @@ extern struct iommu_table *iommu_init_table(struct
 iommu_table * tbl,
   int nid);
  extern void iommu_register_group(struct iommu_table *tbl,
int pci_domain_number, unsigned long pe_num);
 +extern int iommu_add_device(struct device *dev); extern void
 +iommu_del_device(struct device *dev);
 +
 +static inline void set_iommu_table_base_and_group(struct device *dev,
 +   void *base)
 +{
 + set_iommu_table_base(dev, base);
 + iommu_add_device(dev);
 +}
 
  extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
   struct scatterlist *sglist, int nelems, diff --git
 a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index
 b20ff17..15f8ca8 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
 @@ -1105,7 +1105,7 @@ void iommu_release_ownership(struct iommu_table *tbl)  }
 EXPORT_SYMBOL_GPL(iommu_release_ownership);
 
 -static int iommu_add_device(struct device *dev)
 +int iommu_add_device(struct device *dev)
  {
   struct iommu_table *tbl;
   int ret = 0;
 @@ -1134,46 +1134,13 @@ static int iommu_add_device(struct device *dev)
 
   return ret;
  }
 +EXPORT_SYMBOL_GPL(iommu_add_device);
 
 -static void iommu_del_device(struct device *dev)
 +void iommu_del_device(struct device *dev)
  {
   iommu_group_remove_device(dev);
  }
 -
 -static int iommu_bus_notifier(struct notifier_block *nb,
 -   unsigned long action, void *data)
 -{
 - struct device *dev = data;
 -
 - switch (action) {
 - case BUS_NOTIFY_ADD_DEVICE:
 - return iommu_add_device(dev);
 - case BUS_NOTIFY_DEL_DEVICE:
 - iommu_del_device(dev);
 - return 0;
 - default:
 - return 0;
 - }
 -}
 -
 -static struct notifier_block tce_iommu_bus_nb = {
 - .notifier_call = iommu_bus_notifier,
 -};
 -
 -static int __init tce_iommu_init(void)
 -{
 - struct pci_dev *pdev = NULL;
 -
 - BUILD_BUG_ON(PAGE_SIZE  IOMMU_PAGE_SIZE);
 -
 - for_each_pci_dev(pdev)
 - iommu_add_device(pdev-dev);
 -
 - bus_register_notifier(pci_bus_type, tce_iommu_bus_nb);
 - return 0;
 -}
 -
 -subsys_initcall_sync(tce_iommu_init);
 +EXPORT_SYMBOL_GPL(iommu_del_device);
 
  

RE: [PATCH v9 01/13] KVM: PPC: POWERNV: move iommu_add_device earlier

2013-10-29 Thread Bhushan Bharat-R65777
Hi Alex,

Looks like this patch is not picked by anyone, Are you going to pick this patch?
My vfio/iommu patches have dependency on this patch (this is already tested by 
me).

Thanks
-Bharat

 -Original Message-
 From: Linuxppc-dev [mailto:linuxppc-dev-
 bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Alexey
 Kardashevskiy
 Sent: Wednesday, August 28, 2013 2:08 PM
 To: linuxppc-dev@lists.ozlabs.org
 Cc: k...@vger.kernel.org; Gleb Natapov; Alexey Kardashevskiy; Alexander Graf;
 kvm-...@vger.kernel.org; linux-ker...@vger.kernel.org; linux...@kvack.org; 
 Paul
 Mackerras; Paolo Bonzini; David Gibson
 Subject: [PATCH v9 01/13] KVM: PPC: POWERNV: move iommu_add_device earlier
 
 The current implementation of IOMMU on sPAPR does not use iommu_ops and
 therefore does not call IOMMU API's bus_set_iommu() which
 1) sets iommu_ops for a bus
 2) registers a bus notifier
 Instead, PCI devices are added to IOMMU groups from
 subsys_initcall_sync(tce_iommu_init) which does basically the same thing 
 without
 using iommu_ops callbacks.
 
 However Freescale PAMU driver (https://lkml.org/lkml/2013/7/1/158)
 implements iommu_ops and when tce_iommu_init is called, every PCI device is
 already added to some group so there is a conflict.
 
 This patch does 2 things:
 1. removes the loop in which PCI devices were added to groups and adds 
 explicit
 iommu_add_device() calls to add devices as soon as they get the iommu_table
 pointer assigned to them.
 2. moves a bus notifier to powernv code in order to avoid conflict with the
 notifier from Freescale driver.
 
 iommu_add_device() and iommu_del_device() are public now.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
 Changes:
 v8:
 * added the check for iommu_group!=NULL before removing device from a group as
 suggested by Wei Yang weiy...@linux.vnet.ibm.com
 
 v2:
 * added a helper - set_iommu_table_base_and_group - which does
 set_iommu_table_base() and iommu_add_device()
 ---
  arch/powerpc/include/asm/iommu.h|  9 +++
  arch/powerpc/kernel/iommu.c | 41 
 +++--
  arch/powerpc/platforms/powernv/pci-ioda.c   |  8 +++---
  arch/powerpc/platforms/powernv/pci-p5ioc2.c |  2 +-
  arch/powerpc/platforms/powernv/pci.c| 33 ++-
  arch/powerpc/platforms/pseries/iommu.c  |  8 +++---
  6 files changed, 55 insertions(+), 46 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/iommu.h 
 b/arch/powerpc/include/asm/iommu.h
 index c34656a..19ad77f 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
 @@ -103,6 +103,15 @@ extern struct iommu_table *iommu_init_table(struct
 iommu_table * tbl,
   int nid);
  extern void iommu_register_group(struct iommu_table *tbl,
int pci_domain_number, unsigned long pe_num);
 +extern int iommu_add_device(struct device *dev); extern void
 +iommu_del_device(struct device *dev);
 +
 +static inline void set_iommu_table_base_and_group(struct device *dev,
 +   void *base)
 +{
 + set_iommu_table_base(dev, base);
 + iommu_add_device(dev);
 +}
 
  extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
   struct scatterlist *sglist, int nelems, diff --git
 a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index
 b20ff17..15f8ca8 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
 @@ -1105,7 +1105,7 @@ void iommu_release_ownership(struct iommu_table *tbl)  }
 EXPORT_SYMBOL_GPL(iommu_release_ownership);
 
 -static int iommu_add_device(struct device *dev)
 +int iommu_add_device(struct device *dev)
  {
   struct iommu_table *tbl;
   int ret = 0;
 @@ -1134,46 +1134,13 @@ static int iommu_add_device(struct device *dev)
 
   return ret;
  }
 +EXPORT_SYMBOL_GPL(iommu_add_device);
 
 -static void iommu_del_device(struct device *dev)
 +void iommu_del_device(struct device *dev)
  {
   iommu_group_remove_device(dev);
  }
 -
 -static int iommu_bus_notifier(struct notifier_block *nb,
 -   unsigned long action, void *data)
 -{
 - struct device *dev = data;
 -
 - switch (action) {
 - case BUS_NOTIFY_ADD_DEVICE:
 - return iommu_add_device(dev);
 - case BUS_NOTIFY_DEL_DEVICE:
 - iommu_del_device(dev);
 - return 0;
 - default:
 - return 0;
 - }
 -}
 -
 -static struct notifier_block tce_iommu_bus_nb = {
 - .notifier_call = iommu_bus_notifier,
 -};
 -
 -static int __init tce_iommu_init(void)
 -{
 - struct pci_dev *pdev = NULL;
 -
 - BUILD_BUG_ON(PAGE_SIZE  IOMMU_PAGE_SIZE);
 -
 - for_each_pci_dev(pdev)
 - iommu_add_device(pdev-dev);
 -
 - bus_register_notifier(pci_bus_type, tce_iommu_bus_nb);
 - return 0;
 -}
 -
 -subsys_initcall_sync(tce_iommu_init);
 +EXPORT_SYMBOL_GPL(iommu_del_device);
 
  

RE: [PATCH 2/7] Initial skeleton of VFIO support for Device Tree based devices

2013-10-29 Thread Bhushan Bharat-R65777


 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf 
 Of
 Alex Williamson
 Sent: Tuesday, October 29, 2013 5:17 PM
 To: Don Dutile
 Cc: Bhushan Bharat-R65777; Antonios Motakis; kvm...@lists.cs.columbia.edu;
 linux-samsung-soc@vger.kernel.org; k...@vger.kernel.org; ag...@suse.de; Yoder
 Stuart-B08248; io...@lists.linux-foundation.org; t...@virtualopensystems.com
 Subject: Re: [PATCH 2/7] Initial skeleton of VFIO support for Device Tree 
 based
 devices
 
 On Mon, 2013-10-28 at 21:29 -0400, Don Dutile wrote:
  On 09/30/2013 11:37 AM, Bhushan Bharat-R65777 wrote:
  
  
   -Original Message-
   From: iommu-boun...@lists.linux-foundation.org [mailto:iommu-
   boun...@lists.linux-foundation.org] On Behalf Of Antonios Motakis
   Sent: Monday, September 30, 2013 8:59 PM
   To: kvm...@lists.cs.columbia.edu; alex.william...@redhat.com
   Cc: linux-samsung-soc@vger.kernel.org; k...@vger.kernel.org;
   ag...@suse.de; Yoder Stuart-B08248;
   io...@lists.linux-foundation.org; Antonios Motakis;
   t...@virtualopensystems.com
   Subject: [PATCH 2/7] Initial skeleton of VFIO support for Device
   Tree based devices
  
   Platform devices in the Linux kernel are usually managed by the DT
 interface.
   This patch forms the base to support these kind of devices with VFIO.
  
   Signed-off-by: Antonios Motakisa.mota...@virtualopensystems.com
   ---
 drivers/vfio/Kconfig |  11 +++
 drivers/vfio/Makefile|   1 +
 drivers/vfio/vfio_platform.c | 187
 +++
 include/uapi/linux/vfio.h|   1 +
 4 files changed, 200 insertions(+)
 create mode 100644 drivers/vfio/vfio_platform.c
  
   diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index
   1f84eda..35254b7
   100644
   --- a/drivers/vfio/Kconfig
   +++ b/drivers/vfio/Kconfig
   @@ -13,4 +13,15 @@ menuconfig VFIO
  
  If you don't know what to do here, say N.
  
   +config VFIO_PLATFORM
   +tristate VFIO support for device tree based platform devices
   +depends on VFIO  EVENTFD  OF
   +help
   +  Support for platform devices with VFIO. This is required to 
   make
   +  use of platform devices present on device tree nodes using 
   the VFIO
   +  framework. Devices that are not described in the device tree 
   cannot
   +  be used by this driver.
   +
   +  If you don't know what to do here, say N.
   +
 source drivers/vfio/pci/Kconfig
   diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index
   2398d4a..575c8dd 100644
   --- a/drivers/vfio/Makefile
   +++ b/drivers/vfio/Makefile
   @@ -1,3 +1,4 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_PCI) += pci/
   +obj-$(CONFIG_VFIO_PLATFORM) += vfio_platform.o
   diff --git a/drivers/vfio/vfio_platform.c
   b/drivers/vfio/vfio_platform.c new
  
   We can make this parallel to PCI, something like
   drivers/vfio/platform/platform.c
  
  pls, no.  'platform' is too generic, and it really means 'arm-dt' ...
  so can move it to the arch/arm space, and have it's kconfig conditional on
 ARMVFIO.
  if kept under drivers/vfio, then use a better directory name that ties it to
 arm-dt.
  thanks.
 
 The intention is that vfio platform device support is not arm-dt specific.  
 This
 is to be used by both arm and embedded ppc.  The devices we intend to support
 with them are known as platform drivers in the kernel, thus the name.  I 
 suppose
 the question remains whether the interface here is really generic for any
 platform device or whether we're making whether we're making an interface
 specifically for device tree platform devices, or if those are one in the 
 same.
 In any case, arm-dt is certainly not the answer.

I think we should stick to platform.

-Bharat

 
 Alex
 
   file mode 100644 index 000..b9686b0
   --- /dev/null
   +++ b/drivers/vfio/vfio_platform.c
   @@ -0,0 +1,187 @@
   +/*
   + * Copyright (C) 2013 - Virtual Open Systems
   + * Author: Antonios Motakisa.mota...@virtualopensystems.com
   + *
   + * This program is free software; you can redistribute it and/or
   +modify
   + * it under the terms of the GNU General Public License, version
   +2, as
   + * published by the Free Software Foundation.
   + *
   + * This program is distributed in the hope that it will be useful,
   + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   + * GNU General Public License for more details.
   + *
   + * You should have received a copy of the GNU General Public
   +License
   + * along with this program; if not, write to the Free Software
   + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
 USA.
   + */
   +
   +#includelinux/device.h
   +#includelinux/eventfd.h
   +#includelinux/interrupt.h
   +#includelinux/iommu.h

Re: [Qemu-devel] [PATCH] arm_gic: Keep track of GICD_CPENDR and GICD_SPENDR

2013-10-29 Thread Bhushan Bharat-R65777
Hi Christoffer,

Not related to the patch, for edge type of interrupt, will setting bit in 
ICD_SPENDR generate interrupt?

Thanks
-Bharat

 -Original Message-
 From: Christoffer Dall [mailto:christoffer.d...@linaro.org]
 Sent: Wednesday, October 23, 2013 8:57 PM
 To: peter.mayd...@linaro.org
 Cc: patc...@linaro.org; qemu-devel@nongnu.org; kvm...@lists.cs.columbia.edu
 Subject: [PATCH] arm_gic: Keep track of GICD_CPENDR and GICD_SPENDR
 
 If software writes to the ISPENDR and sets the pending state of a level-
 triggered interrupt, the falling edge of the hardware input must not clear the
 pending state.  Conversely, if software writes to the ICPENDR, the pending 
 state
 of a level-triggered interrupt should only be cleared if the hardware input is
 not asserted.
 
 This requires an extra state variable to keep track of software writes.
 
 Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
 ---
  hw/intc/arm_gic.c| 20 +---
  hw/intc/arm_gic_common.c |  5 +++--
  hw/intc/gic_internal.h   |  4 
  3 files changed, 24 insertions(+), 5 deletions(-)
 
 diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c index d1ddac1..db54061 
 100644
 --- a/hw/intc/arm_gic.c
 +++ b/hw/intc/arm_gic.c
 @@ -101,6 +101,12 @@ static void gic_clear_pending(GICState *s, int irq, int 
 cm,
 uint8_t src)  {
  unsigned cpu;
 
 +/* If a level-triggered interrupt has been set to pending through the
 + * GICD_SPENDR, then a falling edge does not clear the pending state.
 + */
 +if (GIC_TEST_SW_PENDING(irq, cm))
 +return;
 +
  GIC_CLEAR_PENDING(irq, cm);
  if (irq  GIC_NR_SGIS) {
  cpu = (unsigned)ffs(cm) - 1;
 @@ -177,8 +183,9 @@ uint32_t gic_acknowledge_irq(GICState *s, int cpu)
  s-last_active[new_irq][cpu] = s-running_irq[cpu];
  /* Clear pending flags for both level and edge triggered interrupts.
 Level triggered IRQs will be reasserted once they become inactive.  */
 -gic_clear_pending(s, new_irq, GIC_TEST_MODEL(new_irq) ? ALL_CPU_MASK : 
 cm,
 -  GIC_SGI_SRC(new_irq, cpu));
 +cm = GIC_TEST_MODEL(new_irq) ? ALL_CPU_MASK : cm;
 +GIC_CLEAR_SW_PENDING(new_irq, cm);
 +gic_clear_pending(s, new_irq, cm, GIC_SGI_SRC(new_irq, cpu));
  gic_set_running_irq(s, cpu, new_irq);
  DPRINTF(ACK %d\n, new_irq);
  return new_irq;
 @@ -445,16 +452,23 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
  for (i = 0; i  8; i++) {
  if (value  (1  i)) {
  GIC_SET_PENDING(irq + i, GIC_TARGET(irq + i));
 +if (!GIC_TEST_TRIGGER(irq + i)) {
 +GIC_SET_SW_PENDING(irq + i, GIC_TARGET(irq + i));
 +}
  }
  }
  } else if (offset  0x300) {
 +int cm = (1  cpu);
  /* Interrupt Clear Pending.  */
  irq = (offset - 0x280) * 8 + GIC_BASE_IRQ;
  if (irq = s-num_irq)
  goto bad_reg;
  for (i = 0; i  8; i++, irq++) {
  if (irq  GIC_NR_SGIS  value  (1  i)) {
 -gic_clear_pending(s, irq, 1  cpu, 0);
 +GIC_CLEAR_SW_PENDING(irq, cm);
 +if (GIC_TEST_TRIGGER(irq + i) || !GIC_TEST_LEVEL(irq, cm)) {
 +GIC_CLEAR_PENDING(irq, cm);
 +}
  }
  }
  } else if (offset  0x400) {
 diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c index
 1d3b738..7f0615f 100644
 --- a/hw/intc/arm_gic_common.c
 +++ b/hw/intc/arm_gic_common.c
 @@ -43,11 +43,12 @@ static int gic_post_load(void *opaque, int version_id)
 
  static const VMStateDescription vmstate_gic_irq_state = {
  .name = arm_gic_irq_state,
 -.version_id = 1,
 -.minimum_version_id = 1,
 +.version_id = 2,
 +.minimum_version_id = 2,
  .fields = (VMStateField[]) {
  VMSTATE_UINT8(enabled, gic_irq_state),
  VMSTATE_UINT8(pending, gic_irq_state),
 +VMSTATE_UINT8(sw_pending, gic_irq_state),
  VMSTATE_UINT8(active, gic_irq_state),
  VMSTATE_UINT8(level, gic_irq_state),
  VMSTATE_BOOL(model, gic_irq_state), diff --git 
 a/hw/intc/gic_internal.h
 b/hw/intc/gic_internal.h index f9133b9..173c607 100644
 --- a/hw/intc/gic_internal.h
 +++ b/hw/intc/gic_internal.h
 @@ -43,6 +43,9 @@
  #define GIC_SET_PENDING(irq, cm) s-irq_state[irq].pending |= (cm)  #define
 GIC_CLEAR_PENDING(irq, cm) s-irq_state[irq].pending = ~(cm)  #define
 GIC_TEST_PENDING(irq, cm) ((s-irq_state[irq].pending  (cm)) != 0)
 +#define GIC_SET_SW_PENDING(irq, cm) s-irq_state[irq].sw_pending |=
 +(cm) #define GIC_CLEAR_SW_PENDING(irq, cm) s-irq_state[irq].sw_pending
 += ~(cm) #define GIC_TEST_SW_PENDING(irq, cm)
 +((s-irq_state[irq].sw_pending  (cm)) != 0)
  #define GIC_SET_ACTIVE(irq, cm) s-irq_state[irq].active |= (cm)  #define
 GIC_CLEAR_ACTIVE(irq, cm) s-irq_state[irq].active = ~(cm)  #define
 GIC_TEST_ACTIVE(irq, cm) ((s-irq_state[irq].active  (cm)) != 0) @@ 

RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Wood Scott-B07421
> Sent: Tuesday, October 29, 2013 10:05 AM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
> christoffer.d...@linaro.org; linux-kernel@vger.kernel.org;
> a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
> peter.mayd...@linaro.org; santosh.shu...@linaro.org; k...@vger.kernel.org;
> gre...@linuxfoundation.org
> Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
> sysfs only
> 
> On Mon, 2013-10-28 at 23:31 -0500, Bhushan Bharat-R65777 wrote:
> >
> > > -Original Message-
> > > From: Wood Scott-B07421
> > > Sent: Tuesday, October 29, 2013 10:00 AM
> > > To: Bhushan Bharat-R65777
> > > Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
> > > Stuart-B08248; christoffer.d...@linaro.org;
> > > linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
> > > ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
> > > santosh.shu...@linaro.org; k...@vger.kernel.org;
> > > gre...@linuxfoundation.org
> > > Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
> > > binding via sysfs only
> > >
> > > On Mon, 2013-10-28 at 22:52 -0500, Bhushan Bharat-R65777 wrote:
> > > >
> > > > > -Original Message-
> > > > > From: Wood Scott-B07421
> > > > > Sent: Tuesday, October 29, 2013 9:11 AM
> > > > > To: Bhushan Bharat-R65777
> > > > > Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
> > > > > Stuart-B08248; christoffer.d...@linaro.org;
> > > > > linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
> > > > > ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
> > > > > santosh.shu...@linaro.org; k...@vger.kernel.org;
> > > > > gre...@linuxfoundation.org
> > > > > Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
> > > > > binding via sysfs only
> > > > >
> > > > > On Mon, 2013-10-28 at 22:38 -0500, Bhushan Bharat-R65777 wrote:
> > > > > >
> > > > > > > -Original Message-
> > > > > > > From: Wood Scott-B07421
> > > > > > > Sent: Monday, October 28, 2013 11:40 PM
> > > > > > > To: Alex Williamson
> > > > > > > Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421;
> > > > > > > Yoder Stuart-B08248; christoffer.d...@linaro.org;
> > > > > > > linux-kernel@vger.kernel.org;
> > > > > > > a.mota...@virtualopensystems.com; ag...@suse.de; Sethi
> > > > > > > Varun-B16395; peter.mayd...@linaro.org;
> > > > > > > santosh.shu...@linaro.org; k...@vger.kernel.org;
> > > > > > > gre...@linuxfoundation.org
> > > > > > > Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for
> > > > > > > explicit binding via sysfs only
> > > > > > >
> > > > > > > On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
> > > > > > > > On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
> > > > > > > > > On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
> > > > > > > > > > Force the vfio-pci driver to only be bound explicitly
> > > > > > > > > > via sysfs to avoid conflics with other drivers in the
> > > > > > > > > > event of a
> > > hotplug.
> > > > > > > > >
> > > > > > > > > We can't break userspace, so we can't disable the
> > > > > > > > > current method of binding devices to vfio-pci.  We can
> > > > > > > > > add a new method and perhaps deprecate the existing
> > > > > > > > > mechanism to be removed at some point in the future.
> > > > > > > > > Thanks,
> > > > > > > >
> > > > > > > > I thought the existing method involved using sysfs bind,
> > > > > > > > and this was just eliminating a race.  How does the bind
> > > > > > > > get triggered
> > > currently?
> > > > > > >
> > > > > > > OK, so it seems it's relying on the write to new_id calling
> > > driver_attach().
> > > > > > > Sigh.  I 

RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Wood Scott-B07421
> Sent: Tuesday, October 29, 2013 10:00 AM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
> christoffer.d...@linaro.org; linux-kernel@vger.kernel.org;
> a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
> peter.mayd...@linaro.org; santosh.shu...@linaro.org; k...@vger.kernel.org;
> gre...@linuxfoundation.org
> Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
> sysfs only
> 
> On Mon, 2013-10-28 at 22:52 -0500, Bhushan Bharat-R65777 wrote:
> >
> > > -Original Message-
> > > From: Wood Scott-B07421
> > > Sent: Tuesday, October 29, 2013 9:11 AM
> > > To: Bhushan Bharat-R65777
> > > Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
> > > Stuart-B08248; christoffer.d...@linaro.org;
> > > linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
> > > ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
> > > santosh.shu...@linaro.org; k...@vger.kernel.org;
> > > gre...@linuxfoundation.org
> > > Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
> > > binding via sysfs only
> > >
> > > On Mon, 2013-10-28 at 22:38 -0500, Bhushan Bharat-R65777 wrote:
> > > >
> > > > > -Original Message-
> > > > > From: Wood Scott-B07421
> > > > > Sent: Monday, October 28, 2013 11:40 PM
> > > > > To: Alex Williamson
> > > > > Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421;
> > > > > Yoder Stuart-B08248; christoffer.d...@linaro.org;
> > > > > linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
> > > > > ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
> > > > > santosh.shu...@linaro.org; k...@vger.kernel.org;
> > > > > gre...@linuxfoundation.org
> > > > > Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
> > > > > binding via sysfs only
> > > > >
> > > > > On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
> > > > > > On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
> > > > > > > On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
> > > > > > > > Force the vfio-pci driver to only be bound explicitly via
> > > > > > > > sysfs to avoid conflics with other drivers in the event of a
> hotplug.
> > > > > > >
> > > > > > > We can't break userspace, so we can't disable the current
> > > > > > > method of binding devices to vfio-pci.  We can add a new
> > > > > > > method and perhaps deprecate the existing mechanism to be
> > > > > > > removed at some point in the future.  Thanks,
> > > > > >
> > > > > > I thought the existing method involved using sysfs bind, and
> > > > > > this was just eliminating a race.  How does the bind get triggered
> currently?
> > > > >
> > > > > OK, so it seems it's relying on the write to new_id calling
> driver_attach().
> > > > > Sigh.  I guess we could make driver-sysfs-bind-only be settable
> > > > > via sysfs, and have new-userspace set both that and PCI_ANY_ID
> > > > > (or the specific ID if userspace
> > > > > prefers) via new_id.  The platform bus patches could continue as
> > > > > is, since there's no existing mechanism to break.
> > > >
> > > > What about changing the store_new_id() to bypass exact ids check
> > > > if driver
> > > have PCI_ANY_ID?
> > >
> > > I don't follow.
> >
> > store_new_id() function id defined as:
> >
> > static ssize_t store_new_id(struct device_driver *driver, const char
> > *buf, size_t count) {
> > struct pci_driver *pdrv = to_pci_driver(driver);
> > const struct pci_device_id *ids = pdrv->id_table;
> >
> > 
> > /* Only accept driver_data values that match an existing id_table
> >entry */
> > if (ids) {
> > retval = -EINVAL;
> > while (ids->vendor || ids->subvendor || ids->class_mask) {
> > if (driver_data == ids->driver_data) {
> > retval = 0;
> > break;
> > }
> > ids++;
> > }
> > if (retval) /* No match */
> > return retval;
> > }
> >
> > retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice,
> >class, class_mask, driver_data); 
> >
> >
> > So when ids == NULL it does not check of vendor etc and calls 
> > pci_add_dynid()
> which in turn calls driver_attach().
> >
> > If we change the above loop to break if ids->vendor == PCI_ANY_ID && ids-
> >subvendor == PCI_ANY_ID then also we will call pci_add_dyids().
> 
> What problem are you trying to solve?

new_id interface to continue working as before.

-Bharat

> 
> -Scott
> 

N�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a���
0��h���i

RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Wood Scott-B07421
> Sent: Tuesday, October 29, 2013 9:11 AM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
> christoffer.d...@linaro.org; linux-kernel@vger.kernel.org;
> a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
> peter.mayd...@linaro.org; santosh.shu...@linaro.org; k...@vger.kernel.org;
> gre...@linuxfoundation.org
> Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
> sysfs only
> 
> On Mon, 2013-10-28 at 22:38 -0500, Bhushan Bharat-R65777 wrote:
> >
> > > -Original Message-
> > > From: Wood Scott-B07421
> > > Sent: Monday, October 28, 2013 11:40 PM
> > > To: Alex Williamson
> > > Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421; Yoder
> > > Stuart-B08248; christoffer.d...@linaro.org;
> > > linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
> > > ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
> > > santosh.shu...@linaro.org; k...@vger.kernel.org;
> > > gre...@linuxfoundation.org
> > > Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
> > > binding via sysfs only
> > >
> > > On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
> > > > On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
> > > > > On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
> > > > > > Force the vfio-pci driver to only be bound explicitly via
> > > > > > sysfs to avoid conflics with other drivers in the event of a 
> > > > > > hotplug.
> > > > >
> > > > > We can't break userspace, so we can't disable the current method
> > > > > of binding devices to vfio-pci.  We can add a new method and
> > > > > perhaps deprecate the existing mechanism to be removed at some
> > > > > point in the future.  Thanks,
> > > >
> > > > I thought the existing method involved using sysfs bind, and this
> > > > was just eliminating a race.  How does the bind get triggered currently?
> > >
> > > OK, so it seems it's relying on the write to new_id calling 
> > > driver_attach().
> > > Sigh.  I guess we could make driver-sysfs-bind-only be settable via
> > > sysfs, and have new-userspace set both that and PCI_ANY_ID (or the
> > > specific ID if userspace
> > > prefers) via new_id.  The platform bus patches could continue as is,
> > > since there's no existing mechanism to break.
> >
> > What about changing the store_new_id() to bypass exact ids check if driver
> have PCI_ANY_ID?
> 
> I don't follow.

store_new_id() function id defined as:

static ssize_t store_new_id(struct device_driver *driver, const char *buf, 
size_t count)
{
struct pci_driver *pdrv = to_pci_driver(driver);
const struct pci_device_id *ids = pdrv->id_table;


/* Only accept driver_data values that match an existing id_table
   entry */
if (ids) {
retval = -EINVAL;
while (ids->vendor || ids->subvendor || ids->class_mask) {
if (driver_data == ids->driver_data) {
retval = 0;
break;
}
ids++;
}
if (retval) /* No match */
return retval;
}

retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice,
   class, class_mask, driver_data);



So when ids == NULL it does not check of vendor etc and calls pci_add_dynid() 
which in turn calls driver_attach().

If we change the above loop to break if ids->vendor == PCI_ANY_ID && 
ids->subvendor == PCI_ANY_ID then also we will call pci_add_dyids().

-Bharat


> 
> -Scott
> 



RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Wood Scott-B07421
> Sent: Monday, October 28, 2013 11:40 PM
> To: Alex Williamson
> Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421; Yoder 
> Stuart-B08248;
> christoffer.d...@linaro.org; linux-kernel@vger.kernel.org;
> a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
> peter.mayd...@linaro.org; santosh.shu...@linaro.org; k...@vger.kernel.org;
> gre...@linuxfoundation.org
> Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
> sysfs only
> 
> On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
> > On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
> > > On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
> > > > Force the vfio-pci driver to only be bound explicitly via sysfs to
> > > > avoid conflics with other drivers in the event of a hotplug.
> > >
> > > We can't break userspace, so we can't disable the current method of
> > > binding devices to vfio-pci.  We can add a new method and perhaps
> > > deprecate the existing mechanism to be removed at some point in the
> > > future.  Thanks,
> >
> > I thought the existing method involved using sysfs bind, and this was
> > just eliminating a race.  How does the bind get triggered currently?
> 
> OK, so it seems it's relying on the write to new_id calling driver_attach().
> Sigh.  I guess we could make driver-sysfs-bind-only be settable via sysfs, and
> have new-userspace set both that and PCI_ANY_ID (or the specific ID if 
> userspace
> prefers) via new_id.  The platform bus patches could continue as is, since
> there's no existing mechanism to break.

What about changing the store_new_id() to bypass exact ids check if driver have 
PCI_ANY_ID?

-Bharat

> 
> -Scott
> 



RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Monday, October 28, 2013 11:40 PM
 To: Alex Williamson
 Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421; Yoder 
 Stuart-B08248;
 christoffer.d...@linaro.org; linux-kernel@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
 peter.mayd...@linaro.org; santosh.shu...@linaro.org; k...@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
  On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
   On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
Force the vfio-pci driver to only be bound explicitly via sysfs to
avoid conflics with other drivers in the event of a hotplug.
  
   We can't break userspace, so we can't disable the current method of
   binding devices to vfio-pci.  We can add a new method and perhaps
   deprecate the existing mechanism to be removed at some point in the
   future.  Thanks,
 
  I thought the existing method involved using sysfs bind, and this was
  just eliminating a race.  How does the bind get triggered currently?
 
 OK, so it seems it's relying on the write to new_id calling driver_attach().
 Sigh.  I guess we could make driver-sysfs-bind-only be settable via sysfs, and
 have new-userspace set both that and PCI_ANY_ID (or the specific ID if 
 userspace
 prefers) via new_id.  The platform bus patches could continue as is, since
 there's no existing mechanism to break.

What about changing the store_new_id() to bypass exact ids check if driver have 
PCI_ANY_ID?

-Bharat

 
 -Scott
 



RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 9:11 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
 christoffer.d...@linaro.org; linux-kernel@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
 peter.mayd...@linaro.org; santosh.shu...@linaro.org; k...@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Mon, 2013-10-28 at 22:38 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Monday, October 28, 2013 11:40 PM
   To: Alex Williamson
   Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421; Yoder
   Stuart-B08248; christoffer.d...@linaro.org;
   linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
   ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; k...@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
   binding via sysfs only
  
   On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
 On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
  Force the vfio-pci driver to only be bound explicitly via
  sysfs to avoid conflics with other drivers in the event of a 
  hotplug.

 We can't break userspace, so we can't disable the current method
 of binding devices to vfio-pci.  We can add a new method and
 perhaps deprecate the existing mechanism to be removed at some
 point in the future.  Thanks,
   
I thought the existing method involved using sysfs bind, and this
was just eliminating a race.  How does the bind get triggered currently?
  
   OK, so it seems it's relying on the write to new_id calling 
   driver_attach().
   Sigh.  I guess we could make driver-sysfs-bind-only be settable via
   sysfs, and have new-userspace set both that and PCI_ANY_ID (or the
   specific ID if userspace
   prefers) via new_id.  The platform bus patches could continue as is,
   since there's no existing mechanism to break.
 
  What about changing the store_new_id() to bypass exact ids check if driver
 have PCI_ANY_ID?
 
 I don't follow.

store_new_id() function id defined as:

static ssize_t store_new_id(struct device_driver *driver, const char *buf, 
size_t count)
{
struct pci_driver *pdrv = to_pci_driver(driver);
const struct pci_device_id *ids = pdrv-id_table;

snip
/* Only accept driver_data values that match an existing id_table
   entry */
if (ids) {
retval = -EINVAL;
while (ids-vendor || ids-subvendor || ids-class_mask) {
if (driver_data == ids-driver_data) {
retval = 0;
break;
}
ids++;
}
if (retval) /* No match */
return retval;
}

retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice,
   class, class_mask, driver_data);
snip


So when ids == NULL it does not check of vendor etc and calls pci_add_dynid() 
which in turn calls driver_attach().

If we change the above loop to break if ids-vendor == PCI_ANY_ID  
ids-subvendor == PCI_ANY_ID then also we will call pci_add_dyids().

-Bharat


 
 -Scott
 



RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 10:00 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
 christoffer.d...@linaro.org; linux-kernel@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
 peter.mayd...@linaro.org; santosh.shu...@linaro.org; k...@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Mon, 2013-10-28 at 22:52 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Tuesday, October 29, 2013 9:11 AM
   To: Bhushan Bharat-R65777
   Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
   Stuart-B08248; christoffer.d...@linaro.org;
   linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
   ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; k...@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
   binding via sysfs only
  
   On Mon, 2013-10-28 at 22:38 -0500, Bhushan Bharat-R65777 wrote:
   
 -Original Message-
 From: Wood Scott-B07421
 Sent: Monday, October 28, 2013 11:40 PM
 To: Alex Williamson
 Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421;
 Yoder Stuart-B08248; christoffer.d...@linaro.org;
 linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
 ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
 santosh.shu...@linaro.org; k...@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
 binding via sysfs only

 On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
  On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
   On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
Force the vfio-pci driver to only be bound explicitly via
sysfs to avoid conflics with other drivers in the event of a
 hotplug.
  
   We can't break userspace, so we can't disable the current
   method of binding devices to vfio-pci.  We can add a new
   method and perhaps deprecate the existing mechanism to be
   removed at some point in the future.  Thanks,
 
  I thought the existing method involved using sysfs bind, and
  this was just eliminating a race.  How does the bind get triggered
 currently?

 OK, so it seems it's relying on the write to new_id calling
 driver_attach().
 Sigh.  I guess we could make driver-sysfs-bind-only be settable
 via sysfs, and have new-userspace set both that and PCI_ANY_ID
 (or the specific ID if userspace
 prefers) via new_id.  The platform bus patches could continue as
 is, since there's no existing mechanism to break.
   
What about changing the store_new_id() to bypass exact ids check
if driver
   have PCI_ANY_ID?
  
   I don't follow.
 
  store_new_id() function id defined as:
 
  static ssize_t store_new_id(struct device_driver *driver, const char
  *buf, size_t count) {
  struct pci_driver *pdrv = to_pci_driver(driver);
  const struct pci_device_id *ids = pdrv-id_table;
 
  snip
  /* Only accept driver_data values that match an existing id_table
 entry */
  if (ids) {
  retval = -EINVAL;
  while (ids-vendor || ids-subvendor || ids-class_mask) {
  if (driver_data == ids-driver_data) {
  retval = 0;
  break;
  }
  ids++;
  }
  if (retval) /* No match */
  return retval;
  }
 
  retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice,
 class, class_mask, driver_data); snip
 
 
  So when ids == NULL it does not check of vendor etc and calls 
  pci_add_dynid()
 which in turn calls driver_attach().
 
  If we change the above loop to break if ids-vendor == PCI_ANY_ID  ids-
 subvendor == PCI_ANY_ID then also we will call pci_add_dyids().
 
 What problem are you trying to solve?

new_id interface to continue working as before.

-Bharat

 
 -Scott
 

N�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�j:+v���zZ+��+zf���h���~i���z��w���?��)ߢf��^jǫy�m��@A�a���
0��h���i

RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 10:05 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
 christoffer.d...@linaro.org; linux-kernel@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
 peter.mayd...@linaro.org; santosh.shu...@linaro.org; k...@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Mon, 2013-10-28 at 23:31 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Tuesday, October 29, 2013 10:00 AM
   To: Bhushan Bharat-R65777
   Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
   Stuart-B08248; christoffer.d...@linaro.org;
   linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
   ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; k...@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
   binding via sysfs only
  
   On Mon, 2013-10-28 at 22:52 -0500, Bhushan Bharat-R65777 wrote:
   
 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 9:11 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
 Stuart-B08248; christoffer.d...@linaro.org;
 linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
 ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
 santosh.shu...@linaro.org; k...@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
 binding via sysfs only

 On Mon, 2013-10-28 at 22:38 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Monday, October 28, 2013 11:40 PM
   To: Alex Williamson
   Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421;
   Yoder Stuart-B08248; christoffer.d...@linaro.org;
   linux-kernel@vger.kernel.org;
   a.mota...@virtualopensystems.com; ag...@suse.de; Sethi
   Varun-B16395; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; k...@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for
   explicit binding via sysfs only
  
   On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
 On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
  Force the vfio-pci driver to only be bound explicitly
  via sysfs to avoid conflics with other drivers in the
  event of a
   hotplug.

 We can't break userspace, so we can't disable the
 current method of binding devices to vfio-pci.  We can
 add a new method and perhaps deprecate the existing
 mechanism to be removed at some point in the future.
 Thanks,
   
I thought the existing method involved using sysfs bind,
and this was just eliminating a race.  How does the bind
get triggered
   currently?
  
   OK, so it seems it's relying on the write to new_id calling
   driver_attach().
   Sigh.  I guess we could make driver-sysfs-bind-only be
   settable via sysfs, and have new-userspace set both that and
   PCI_ANY_ID (or the specific ID if userspace
   prefers) via new_id.  The platform bus patches could
   continue as is, since there's no existing mechanism to break.
 
  What about changing the store_new_id() to bypass exact ids
  check if driver
 have PCI_ANY_ID?

 I don't follow.
   
store_new_id() function id defined as:
   
static ssize_t store_new_id(struct device_driver *driver, const
char *buf, size_t count) {
struct pci_driver *pdrv = to_pci_driver(driver);
const struct pci_device_id *ids = pdrv-id_table;
   
snip
/* Only accept driver_data values that match an existing 
id_table
   entry */
if (ids) {
retval = -EINVAL;
while (ids-vendor || ids-subvendor || 
ids-class_mask) {
if (driver_data == ids-driver_data) {
retval = 0;
break;
}
ids++;
}
if (retval) /* No match */
return retval;
}
   
retval = pci_add_dynid(pdrv, vendor, device, subvendor, 
subdevice,
   class, class_mask, driver_data);
snip
   
   
So when ids == NULL it does not check of vendor etc and calls
pci_add_dynid()
   which in turn calls driver_attach

RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Monday, October 28, 2013 11:40 PM
 To: Alex Williamson
 Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421; Yoder 
 Stuart-B08248;
 christoffer.d...@linaro.org; linux-ker...@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
 peter.mayd...@linaro.org; santosh.shu...@linaro.org; kvm@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
  On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
   On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
Force the vfio-pci driver to only be bound explicitly via sysfs to
avoid conflics with other drivers in the event of a hotplug.
  
   We can't break userspace, so we can't disable the current method of
   binding devices to vfio-pci.  We can add a new method and perhaps
   deprecate the existing mechanism to be removed at some point in the
   future.  Thanks,
 
  I thought the existing method involved using sysfs bind, and this was
  just eliminating a race.  How does the bind get triggered currently?
 
 OK, so it seems it's relying on the write to new_id calling driver_attach().
 Sigh.  I guess we could make driver-sysfs-bind-only be settable via sysfs, and
 have new-userspace set both that and PCI_ANY_ID (or the specific ID if 
 userspace
 prefers) via new_id.  The platform bus patches could continue as is, since
 there's no existing mechanism to break.

What about changing the store_new_id() to bypass exact ids check if driver have 
PCI_ANY_ID?

-Bharat

 
 -Scott
 



RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 9:11 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
 christoffer.d...@linaro.org; linux-ker...@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
 peter.mayd...@linaro.org; santosh.shu...@linaro.org; kvm@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Mon, 2013-10-28 at 22:38 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Monday, October 28, 2013 11:40 PM
   To: Alex Williamson
   Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421; Yoder
   Stuart-B08248; christoffer.d...@linaro.org;
   linux-ker...@vger.kernel.org; a.mota...@virtualopensystems.com;
   ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; kvm@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
   binding via sysfs only
  
   On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
 On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
  Force the vfio-pci driver to only be bound explicitly via
  sysfs to avoid conflics with other drivers in the event of a 
  hotplug.

 We can't break userspace, so we can't disable the current method
 of binding devices to vfio-pci.  We can add a new method and
 perhaps deprecate the existing mechanism to be removed at some
 point in the future.  Thanks,
   
I thought the existing method involved using sysfs bind, and this
was just eliminating a race.  How does the bind get triggered currently?
  
   OK, so it seems it's relying on the write to new_id calling 
   driver_attach().
   Sigh.  I guess we could make driver-sysfs-bind-only be settable via
   sysfs, and have new-userspace set both that and PCI_ANY_ID (or the
   specific ID if userspace
   prefers) via new_id.  The platform bus patches could continue as is,
   since there's no existing mechanism to break.
 
  What about changing the store_new_id() to bypass exact ids check if driver
 have PCI_ANY_ID?
 
 I don't follow.

store_new_id() function id defined as:

static ssize_t store_new_id(struct device_driver *driver, const char *buf, 
size_t count)
{
struct pci_driver *pdrv = to_pci_driver(driver);
const struct pci_device_id *ids = pdrv-id_table;

snip
/* Only accept driver_data values that match an existing id_table
   entry */
if (ids) {
retval = -EINVAL;
while (ids-vendor || ids-subvendor || ids-class_mask) {
if (driver_data == ids-driver_data) {
retval = 0;
break;
}
ids++;
}
if (retval) /* No match */
return retval;
}

retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice,
   class, class_mask, driver_data);
snip


So when ids == NULL it does not check of vendor etc and calls pci_add_dynid() 
which in turn calls driver_attach().

If we change the above loop to break if ids-vendor == PCI_ANY_ID  
ids-subvendor == PCI_ANY_ID then also we will call pci_add_dyids().

-Bharat


 
 -Scott
 



RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 10:00 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
 christoffer.d...@linaro.org; linux-ker...@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
 peter.mayd...@linaro.org; santosh.shu...@linaro.org; kvm@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Mon, 2013-10-28 at 22:52 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Tuesday, October 29, 2013 9:11 AM
   To: Bhushan Bharat-R65777
   Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
   Stuart-B08248; christoffer.d...@linaro.org;
   linux-ker...@vger.kernel.org; a.mota...@virtualopensystems.com;
   ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; kvm@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
   binding via sysfs only
  
   On Mon, 2013-10-28 at 22:38 -0500, Bhushan Bharat-R65777 wrote:
   
 -Original Message-
 From: Wood Scott-B07421
 Sent: Monday, October 28, 2013 11:40 PM
 To: Alex Williamson
 Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421;
 Yoder Stuart-B08248; christoffer.d...@linaro.org;
 linux-ker...@vger.kernel.org; a.mota...@virtualopensystems.com;
 ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
 santosh.shu...@linaro.org; kvm@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
 binding via sysfs only

 On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
  On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
   On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
Force the vfio-pci driver to only be bound explicitly via
sysfs to avoid conflics with other drivers in the event of a
 hotplug.
  
   We can't break userspace, so we can't disable the current
   method of binding devices to vfio-pci.  We can add a new
   method and perhaps deprecate the existing mechanism to be
   removed at some point in the future.  Thanks,
 
  I thought the existing method involved using sysfs bind, and
  this was just eliminating a race.  How does the bind get triggered
 currently?

 OK, so it seems it's relying on the write to new_id calling
 driver_attach().
 Sigh.  I guess we could make driver-sysfs-bind-only be settable
 via sysfs, and have new-userspace set both that and PCI_ANY_ID
 (or the specific ID if userspace
 prefers) via new_id.  The platform bus patches could continue as
 is, since there's no existing mechanism to break.
   
What about changing the store_new_id() to bypass exact ids check
if driver
   have PCI_ANY_ID?
  
   I don't follow.
 
  store_new_id() function id defined as:
 
  static ssize_t store_new_id(struct device_driver *driver, const char
  *buf, size_t count) {
  struct pci_driver *pdrv = to_pci_driver(driver);
  const struct pci_device_id *ids = pdrv-id_table;
 
  snip
  /* Only accept driver_data values that match an existing id_table
 entry */
  if (ids) {
  retval = -EINVAL;
  while (ids-vendor || ids-subvendor || ids-class_mask) {
  if (driver_data == ids-driver_data) {
  retval = 0;
  break;
  }
  ids++;
  }
  if (retval) /* No match */
  return retval;
  }
 
  retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice,
 class, class_mask, driver_data); snip
 
 
  So when ids == NULL it does not check of vendor etc and calls 
  pci_add_dynid()
 which in turn calls driver_attach().
 
  If we change the above loop to break if ids-vendor == PCI_ANY_ID  ids-
 subvendor == PCI_ANY_ID then also we will call pci_add_dyids().
 
 What problem are you trying to solve?

new_id interface to continue working as before.

-Bharat

 
 -Scott
 

N�r��yb�X��ǧv�^�)޺{.n�+h����ܨ}���Ơz�j:+v���zZ+��+zf���h���~i���z��w���?��)ߢf

RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-28 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 10:05 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder Stuart-B08248;
 christoffer.d...@linaro.org; linux-ker...@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
 peter.mayd...@linaro.org; santosh.shu...@linaro.org; kvm@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Mon, 2013-10-28 at 23:31 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Tuesday, October 29, 2013 10:00 AM
   To: Bhushan Bharat-R65777
   Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
   Stuart-B08248; christoffer.d...@linaro.org;
   linux-ker...@vger.kernel.org; a.mota...@virtualopensystems.com;
   ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; kvm@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
   binding via sysfs only
  
   On Mon, 2013-10-28 at 22:52 -0500, Bhushan Bharat-R65777 wrote:
   
 -Original Message-
 From: Wood Scott-B07421
 Sent: Tuesday, October 29, 2013 9:11 AM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Alex Williamson; Kim Phillips; Yoder
 Stuart-B08248; christoffer.d...@linaro.org;
 linux-ker...@vger.kernel.org; a.mota...@virtualopensystems.com;
 ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
 santosh.shu...@linaro.org; kvm@vger.kernel.org;
 gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit
 binding via sysfs only

 On Mon, 2013-10-28 at 22:38 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Monday, October 28, 2013 11:40 PM
   To: Alex Williamson
   Cc: Kim Phillips; Bhushan Bharat-R65777; Wood Scott-B07421;
   Yoder Stuart-B08248; christoffer.d...@linaro.org;
   linux-ker...@vger.kernel.org;
   a.mota...@virtualopensystems.com; ag...@suse.de; Sethi
   Varun-B16395; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; kvm@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for
   explicit binding via sysfs only
  
   On Mon, 2013-10-28 at 13:00 -0500, Scott Wood wrote:
On Mon, 2013-10-28 at 11:47 -0600, Alex Williamson wrote:
 On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
  Force the vfio-pci driver to only be bound explicitly
  via sysfs to avoid conflics with other drivers in the
  event of a
   hotplug.

 We can't break userspace, so we can't disable the
 current method of binding devices to vfio-pci.  We can
 add a new method and perhaps deprecate the existing
 mechanism to be removed at some point in the future.
 Thanks,
   
I thought the existing method involved using sysfs bind,
and this was just eliminating a race.  How does the bind
get triggered
   currently?
  
   OK, so it seems it's relying on the write to new_id calling
   driver_attach().
   Sigh.  I guess we could make driver-sysfs-bind-only be
   settable via sysfs, and have new-userspace set both that and
   PCI_ANY_ID (or the specific ID if userspace
   prefers) via new_id.  The platform bus patches could
   continue as is, since there's no existing mechanism to break.
 
  What about changing the store_new_id() to bypass exact ids
  check if driver
 have PCI_ANY_ID?

 I don't follow.
   
store_new_id() function id defined as:
   
static ssize_t store_new_id(struct device_driver *driver, const
char *buf, size_t count) {
struct pci_driver *pdrv = to_pci_driver(driver);
const struct pci_device_id *ids = pdrv-id_table;
   
snip
/* Only accept driver_data values that match an existing 
id_table
   entry */
if (ids) {
retval = -EINVAL;
while (ids-vendor || ids-subvendor || 
ids-class_mask) {
if (driver_data == ids-driver_data) {
retval = 0;
break;
}
ids++;
}
if (retval) /* No match */
return retval;
}
   
retval = pci_add_dynid(pdrv, vendor, device, subvendor, 
subdevice,
   class, class_mask, driver_data);
snip
   
   
So when ids == NULL it does not check of vendor etc and calls
pci_add_dynid()
   which in turn calls driver_attach

RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-24 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Kim Phillips [mailto:kim.phill...@linaro.org]
> Sent: Saturday, October 12, 2013 4:47 AM
> To: Wood Scott-B07421
> Cc: Bhushan Bharat-R65777; Wood Scott-B07421; Yoder Stuart-B08248;
> christoffer.d...@linaro.org; alex.william...@redhat.com; linux-
> ker...@vger.kernel.org; a.mota...@virtualopensystems.com; ag...@suse.de; Sethi
> Varun-B16395; peter.mayd...@linaro.org; santosh.shu...@linaro.org;
> k...@vger.kernel.org; gre...@linuxfoundation.org
> Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
> sysfs only
> 
> On Fri, 11 Oct 2013 15:43:40 -0500
> Scott Wood  wrote:
> 
> > On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
> > > Force the vfio-pci driver to only be bound explicitly via sysfs to avoid
> > > conflics with other drivers in the event of a hotplug.
> > >
> > > Signed-off-by: Kim Phillips 
> > > ---
> > >  drivers/vfio/pci/vfio_pci.c | 3 +++
> > >  1 file changed, 3 insertions(+)
> > >
> > > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> > > index 6ab71b9..bdd7833 100644
> > > --- a/drivers/vfio/pci/vfio_pci.c
> > > +++ b/drivers/vfio/pci/vfio_pci.c
> > > @@ -901,6 +901,9 @@ static struct pci_driver vfio_pci_driver = {
> > >   .probe  = vfio_pci_probe,
> > >   .remove = vfio_pci_remove,
> > >   .err_handler= _err_handlers,
> > > + .driver = {
> > > + .sysfs_bind_only = true,
> > > + },
> > >  };
> > >
> > >  static void __exit vfio_pci_cleanup(void)
> >
> > You also need to add a PCI_ANY_ID match in order to be able to get rid
> > of the new_id usage.
> 
> thanks - see below.
> 
> Can someone with a PCI bus test this?  Bharat?

Hello Kim,

I can test that we can get rid of new_id and use "bind" to bind the device to 
vfio_pci.

Other thing is generating hotplug, or reorder the driver registration by 
tweaking Makefile to test sysfs_bind_only way to bind is not yet tested.


Thanks
-Bharat

> 
> Kim
> 
> From a8d6c12f2ec763c2ac7fd384a3397c370cc1b932 Mon Sep 17 00:00:00 2001
> From: Kim Phillips 
> Date: Thu, 10 Oct 2013 22:16:34 -0500
> Subject: [PATCH 3/4 v2] VFIO: pci: amend vfio-pci for explicit binding via 
> sysfs
>  only
> 
> Force the vfio-pci driver to only be bound explicitly via sysfs to avoid
> conflics with other drivers in the event of a hotplug.  Also replace
> the only dynamic ids assignment with a table with a single PCI_ANY_ID
> entry since writing the sysfs bind file without having to specify ids
> via the new_id file first should no longer be necessary.
> 
> Signed-off-by: Kim Phillips 
> ---
>  drivers/vfio/pci/vfio_pci.c | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index 6ab71b9..c5b434f 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -895,12 +895,22 @@ static struct pci_error_handlers vfio_err_handlers = {
>   .error_detected = vfio_pci_aer_err_detected,
>  };
> 
> +static DEFINE_PCI_DEVICE_TABLE(vfio_pci_id_table) = {
> +{ PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID) },
> +{ 0 }
> +};
> +
> +MODULE_DEVICE_TABLE(pci, vfio_pci_id_table);
> +
>  static struct pci_driver vfio_pci_driver = {
>   .name   = "vfio-pci",
> - .id_table   = NULL, /* only dynamic ids */
> + .id_table   = vfio_pci_id_table, /* no dynamic ids */
>   .probe  = vfio_pci_probe,
>   .remove = vfio_pci_remove,
>   .err_handler= _err_handlers,
> + .driver = {
> + .sysfs_bind_only = true, /* bind only via sysfs */
> + },
>  };
> 
>  static void __exit vfio_pci_cleanup(void)
> --
> 1.8.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-24 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Kim Phillips [mailto:kim.phill...@linaro.org]
 Sent: Saturday, October 12, 2013 4:47 AM
 To: Wood Scott-B07421
 Cc: Bhushan Bharat-R65777; Wood Scott-B07421; Yoder Stuart-B08248;
 christoffer.d...@linaro.org; alex.william...@redhat.com; linux-
 ker...@vger.kernel.org; a.mota...@virtualopensystems.com; ag...@suse.de; Sethi
 Varun-B16395; peter.mayd...@linaro.org; santosh.shu...@linaro.org;
 k...@vger.kernel.org; gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Fri, 11 Oct 2013 15:43:40 -0500
 Scott Wood scottw...@freescale.com wrote:
 
  On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
   Force the vfio-pci driver to only be bound explicitly via sysfs to avoid
   conflics with other drivers in the event of a hotplug.
  
   Signed-off-by: Kim Phillips kim.phill...@linaro.org
   ---
drivers/vfio/pci/vfio_pci.c | 3 +++
1 file changed, 3 insertions(+)
  
   diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
   index 6ab71b9..bdd7833 100644
   --- a/drivers/vfio/pci/vfio_pci.c
   +++ b/drivers/vfio/pci/vfio_pci.c
   @@ -901,6 +901,9 @@ static struct pci_driver vfio_pci_driver = {
 .probe  = vfio_pci_probe,
 .remove = vfio_pci_remove,
 .err_handler= vfio_err_handlers,
   + .driver = {
   + .sysfs_bind_only = true,
   + },
};
  
static void __exit vfio_pci_cleanup(void)
 
  You also need to add a PCI_ANY_ID match in order to be able to get rid
  of the new_id usage.
 
 thanks - see below.
 
 Can someone with a PCI bus test this?  Bharat?

Hello Kim,

I can test that we can get rid of new_id and use bind to bind the device to 
vfio_pci.

Other thing is generating hotplug, or reorder the driver registration by 
tweaking Makefile to test sysfs_bind_only way to bind is not yet tested.


Thanks
-Bharat

 
 Kim
 
 From a8d6c12f2ec763c2ac7fd384a3397c370cc1b932 Mon Sep 17 00:00:00 2001
 From: Kim Phillips kim.phill...@linaro.org
 Date: Thu, 10 Oct 2013 22:16:34 -0500
 Subject: [PATCH 3/4 v2] VFIO: pci: amend vfio-pci for explicit binding via 
 sysfs
  only
 
 Force the vfio-pci driver to only be bound explicitly via sysfs to avoid
 conflics with other drivers in the event of a hotplug.  Also replace
 the only dynamic ids assignment with a table with a single PCI_ANY_ID
 entry since writing the sysfs bind file without having to specify ids
 via the new_id file first should no longer be necessary.
 
 Signed-off-by: Kim Phillips kim.phill...@linaro.org
 ---
  drivers/vfio/pci/vfio_pci.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
 index 6ab71b9..c5b434f 100644
 --- a/drivers/vfio/pci/vfio_pci.c
 +++ b/drivers/vfio/pci/vfio_pci.c
 @@ -895,12 +895,22 @@ static struct pci_error_handlers vfio_err_handlers = {
   .error_detected = vfio_pci_aer_err_detected,
  };
 
 +static DEFINE_PCI_DEVICE_TABLE(vfio_pci_id_table) = {
 +{ PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID) },
 +{ 0 }
 +};
 +
 +MODULE_DEVICE_TABLE(pci, vfio_pci_id_table);
 +
  static struct pci_driver vfio_pci_driver = {
   .name   = vfio-pci,
 - .id_table   = NULL, /* only dynamic ids */
 + .id_table   = vfio_pci_id_table, /* no dynamic ids */
   .probe  = vfio_pci_probe,
   .remove = vfio_pci_remove,
   .err_handler= vfio_err_handlers,
 + .driver = {
 + .sysfs_bind_only = true, /* bind only via sysfs */
 + },
  };
 
  static void __exit vfio_pci_cleanup(void)
 --
 1.8.4


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via sysfs only

2013-10-24 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Kim Phillips [mailto:kim.phill...@linaro.org]
 Sent: Saturday, October 12, 2013 4:47 AM
 To: Wood Scott-B07421
 Cc: Bhushan Bharat-R65777; Wood Scott-B07421; Yoder Stuart-B08248;
 christoffer.d...@linaro.org; alex.william...@redhat.com; linux-
 ker...@vger.kernel.org; a.mota...@virtualopensystems.com; ag...@suse.de; Sethi
 Varun-B16395; peter.mayd...@linaro.org; santosh.shu...@linaro.org;
 kvm@vger.kernel.org; gre...@linuxfoundation.org
 Subject: Re: [PATCH 3/4] VFIO: pci: amend vfio-pci for explicit binding via
 sysfs only
 
 On Fri, 11 Oct 2013 15:43:40 -0500
 Scott Wood scottw...@freescale.com wrote:
 
  On Fri, 2013-10-11 at 01:27 -0500, Kim Phillips wrote:
   Force the vfio-pci driver to only be bound explicitly via sysfs to avoid
   conflics with other drivers in the event of a hotplug.
  
   Signed-off-by: Kim Phillips kim.phill...@linaro.org
   ---
drivers/vfio/pci/vfio_pci.c | 3 +++
1 file changed, 3 insertions(+)
  
   diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
   index 6ab71b9..bdd7833 100644
   --- a/drivers/vfio/pci/vfio_pci.c
   +++ b/drivers/vfio/pci/vfio_pci.c
   @@ -901,6 +901,9 @@ static struct pci_driver vfio_pci_driver = {
 .probe  = vfio_pci_probe,
 .remove = vfio_pci_remove,
 .err_handler= vfio_err_handlers,
   + .driver = {
   + .sysfs_bind_only = true,
   + },
};
  
static void __exit vfio_pci_cleanup(void)
 
  You also need to add a PCI_ANY_ID match in order to be able to get rid
  of the new_id usage.
 
 thanks - see below.
 
 Can someone with a PCI bus test this?  Bharat?

Hello Kim,

I can test that we can get rid of new_id and use bind to bind the device to 
vfio_pci.

Other thing is generating hotplug, or reorder the driver registration by 
tweaking Makefile to test sysfs_bind_only way to bind is not yet tested.


Thanks
-Bharat

 
 Kim
 
 From a8d6c12f2ec763c2ac7fd384a3397c370cc1b932 Mon Sep 17 00:00:00 2001
 From: Kim Phillips kim.phill...@linaro.org
 Date: Thu, 10 Oct 2013 22:16:34 -0500
 Subject: [PATCH 3/4 v2] VFIO: pci: amend vfio-pci for explicit binding via 
 sysfs
  only
 
 Force the vfio-pci driver to only be bound explicitly via sysfs to avoid
 conflics with other drivers in the event of a hotplug.  Also replace
 the only dynamic ids assignment with a table with a single PCI_ANY_ID
 entry since writing the sysfs bind file without having to specify ids
 via the new_id file first should no longer be necessary.
 
 Signed-off-by: Kim Phillips kim.phill...@linaro.org
 ---
  drivers/vfio/pci/vfio_pci.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
 index 6ab71b9..c5b434f 100644
 --- a/drivers/vfio/pci/vfio_pci.c
 +++ b/drivers/vfio/pci/vfio_pci.c
 @@ -895,12 +895,22 @@ static struct pci_error_handlers vfio_err_handlers = {
   .error_detected = vfio_pci_aer_err_detected,
  };
 
 +static DEFINE_PCI_DEVICE_TABLE(vfio_pci_id_table) = {
 +{ PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID) },
 +{ 0 }
 +};
 +
 +MODULE_DEVICE_TABLE(pci, vfio_pci_id_table);
 +
  static struct pci_driver vfio_pci_driver = {
   .name   = vfio-pci,
 - .id_table   = NULL, /* only dynamic ids */
 + .id_table   = vfio_pci_id_table, /* no dynamic ids */
   .probe  = vfio_pci_probe,
   .remove = vfio_pci_remove,
   .err_handler= vfio_err_handlers,
 + .driver = {
 + .sysfs_bind_only = true, /* bind only via sysfs */
 + },
  };
 
  static void __exit vfio_pci_cleanup(void)
 --
 1.8.4


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle

2013-10-18 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wang Dongsheng-B40534
 Sent: Friday, October 18, 2013 8:07 AM
 To: Wood Scott-B07421
 Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org
 Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec
 idle
 
 
 
  -Original Message-
  From: Wood Scott-B07421
  Sent: Friday, October 18, 2013 12:52 AM
  To: Wang Dongsheng-B40534
  Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-
  d...@lists.ozlabs.org
  Subject: Re: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and
  altivec idle
 
  On Thu, 2013-10-17 at 00:51 -0500, Wang Dongsheng-B40534 wrote:
  
-Original Message-
From: Bhushan Bharat-R65777
Sent: Thursday, October 17, 2013 11:20 AM
To: Wang Dongsheng-B40534; Wood Scott-B07421
Cc: linuxppc-dev@lists.ozlabs.org
Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state
and altivec idle
   
   
   
 -Original Message-
 From: Wang Dongsheng-B40534
 Sent: Thursday, October 17, 2013 8:16 AM
 To: Bhushan Bharat-R65777; Wood Scott-B07421
 Cc: linuxppc-dev@lists.ozlabs.org
 Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20
 state and altivec idle



  -Original Message-
  From: Bhushan Bharat-R65777
  Sent: Thursday, October 17, 2013 1:01 AM
  To: Wang Dongsheng-B40534; Wood Scott-B07421
  Cc: linuxppc-dev@lists.ozlabs.org
  Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20
  state and altivec idle
 
 
 
   -Original Message-
   From: Wang Dongsheng-B40534
   Sent: Tuesday, October 15, 2013 2:51 PM
   To: Wood Scott-B07421
   Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org;
   Wang
  Dongsheng-B40534
   Subject: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20
   state and
  altivec idle
  
   +static ssize_t show_pw20_wait_time(struct device *dev,
   + struct device_attribute *attr, char 
   *buf) {
   + u32 value;
   + u64 tb_cycle;
   + s64 time;
   +
   + unsigned int cpu = dev-id;
   +
   + if (!pw20_wt) {
   + smp_call_function_single(cpu, do_show_pwrmgtcr0, value,
1);
   + value = (value  PWRMGTCR0_PW20_ENT) 
   + PWRMGTCR0_PW20_ENT_SHIFT;
   +
   + tb_cycle = (1  (MAX_BIT - value)) * 2;
 
  Is value = 0 and value = 1 legal? These will make tb_cycle =
  0,
 
   + time = div_u64(tb_cycle * 1000, tb_ticks_per_usec) - 1;
 
  And time = -1;
 
 Please look at the end of the function, :)

 return sprintf(buf, %llu\n, time  0 ? time : 0);
   
I know you return 0 if value = 0/1, my question was that, is this
correct as per specification?
   
Ahh, also for value upto 7 you will return 0, no?
   
   If value = 0, MAX_BIT - value = 63
   tb_cycle = 0x_,
 
  Actually, tb_cycle will be undefined because you shifted a 32-bit
  value
  (1) by more than 31 bits.  s/1/1ULL/
 

What Scott is saying is the left shift of 1 for more than 31 will be 
undefined.
Scott this will be sign-extended, right?

-Bharat

 Actually, we have been discussing this situation that could not have happened.
 See !pw20_wt branch, this branch is read default wait bit.
 The default wait bit is 50, the time is about 1ms.
 The default wait bit cannot less than 50, means the wait entry time cannot
 greater than 1ms.
 We have already begun benchmark test, and we got a preliminary results.
 55, 56, 57bit looks good, but we need more benchmark to get the default bit.
 
   if (!pw20_wt) {
   smp_call_function_single(cpu, do_show_pwrmgtcr0, value, 1);
   value = (value  PWRMGTCR0_PW20_ENT) 
   PWRMGTCR0_PW20_ENT_SHIFT;
 
   tb_cycle = (1  (MAX_BIT - value)) * 2;
   time = div_u64(tb_cycle * 1000, tb_ticks_per_usec) - 1;
   } else {
   time = pw20_wt;
   }
 
 If it caused confusion, we can add a comment. As I discuss with Bharat.
 
   tb_cycle * 1000 will overflow, but this situation is not possible.
   Because if the value = 0 means this feature will be disable.
   Now The default wait bit is 50(MAX_BIT - value, value = 13), the
   PW20/Altivec Idle wait entry time is about 1ms, this time is very
   long for wait idle time, and it's cannot be increased(means (MAX_BIT
   -
   value) cannot greater than 50).
 
  Why can it not be increased?
 
 see above, :)


 
 -dongsheng
  -Scott
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle

2013-10-18 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Saturday, October 19, 2013 12:52 AM
 To: Wang Dongsheng-B40534
 Cc: Bhushan Bharat-R65777; Wood Scott-B07421; linuxppc-dev@lists.ozlabs.org
 Subject: Re: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec
 idle
 
 On Thu, 2013-10-17 at 22:02 -0500, Wang Dongsheng-B40534 wrote:
 
   -Original Message-
   From: Bhushan Bharat-R65777
   Sent: Thursday, October 17, 2013 2:46 PM
   To: Wang Dongsheng-B40534; Wood Scott-B07421
   Cc: linuxppc-dev@lists.ozlabs.org
   Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state
   and altivec idle
  
  
  
  -Original Message-
  From: Wang Dongsheng-B40534
  Sent: Thursday, October 17, 2013 11:22 AM
  To: Bhushan Bharat-R65777; Wood Scott-B07421
  Cc: linuxppc-dev@lists.ozlabs.org
  Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20
  state and altivec idle
 
 
 
   -Original Message-
   From: Bhushan Bharat-R65777
   Sent: Thursday, October 17, 2013 11:20 AM
   To: Wang Dongsheng-B40534; Wood Scott-B07421
   Cc: linuxppc-dev@lists.ozlabs.org
   Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20
   state and altivec idle
  
  
  
-Original Message-
From: Wang Dongsheng-B40534
Sent: Thursday, October 17, 2013 8:16 AM
To: Bhushan Bharat-R65777; Wood Scott-B07421
Cc: linuxppc-dev@lists.ozlabs.org
Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for
pw20 state and altivec idle
   
   
   
 -Original Message-
 From: Bhushan Bharat-R65777
 Sent: Thursday, October 17, 2013 1:01 AM
 To: Wang Dongsheng-B40534; Wood Scott-B07421
 Cc: linuxppc-dev@lists.ozlabs.org
 Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for
 pw20 state and altivec idle



  -Original Message-
  From: Wang Dongsheng-B40534
  Sent: Tuesday, October 15, 2013 2:51 PM
  To: Wood Scott-B07421
  Cc: Bhushan Bharat-R65777;
  linuxppc-dev@lists.ozlabs.org; Wang
 Dongsheng-B40534
  Subject: [PATCH v5 4/4] powerpc/85xx: add sysfs for
  pw20 state and
 altivec idle
 
  From: Wang Dongsheng dongsheng.w...@freescale.com
 
  Add a sys interface to enable/diable pw20 state or
  altivec idle, and
 control the
  wait entry time.
 
  Enable/Disable interface:
  0, disable. 1, enable.
  /sys/devices/system/cpu/cpuX/pw20_state
  /sys/devices/system/cpu/cpuX/altivec_idle
 
  Set wait time interface:(Nanosecond)
  /sys/devices/system/cpu/cpuX/pw20_wait_time
  /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
  Example: Base on TBfreq is 41MHZ.
  1~48(ns): TB[63]
  49~97(ns): TB[62]
  98~195(ns): TB[61]
  196~390(ns): TB[60]
  391~780(ns): TB[59]
  781~1560(ns): TB[58]
  ...
 
  Signed-off-by: Wang Dongsheng
  dongsheng.w...@freescale.com
  ---
  *v5:
  Change get_idle_ticks_bit function implementation.
 
  *v4:
  Move code from 85xx/common.c to kernel/sysfs.c.
 
  Remove has_pw20_altivec_idle function.
 
  Change wait entry_bit to wait time.
 
  diff --git a/arch/powerpc/kernel/sysfs.c
  b/arch/powerpc/kernel/sysfs.c
 index
  27a90b9..10d1128 100644
  --- a/arch/powerpc/kernel/sysfs.c
  +++ b/arch/powerpc/kernel/sysfs.c
  @@ -85,6 +85,284 @@ __setup(smt-snooze-delay=,
 setup_smt_snooze_delay);
 
   #endif /* CONFIG_PPC64 */
 
  +#ifdef CONFIG_FSL_SOC
  +#define MAX_BIT63
  +
  +static u64 pw20_wt;
  +static u64 altivec_idle_wt;
  +
  +static unsigned int get_idle_ticks_bit(u64 ns) {
  +   u64 cycle;
  +
  +   if (ns = 1)
  +   cycle = div_u64(ns + 500, 1000) *
   tb_ticks_per_usec;
  +   else
  +   cycle = div_u64(ns * tb_ticks_per_usec, 1000);
  +
  +   if (!cycle)
  +   return 0;
  +
  +   return ilog2(cycle); }
  +
  +static void do_show_pwrmgtcr0(void *val) {
  +   u32 *value = val;
  +
  +   *value = mfspr(SPRN_PWRMGTCR0); }
  +
  +static ssize_t show_pw20_state(struct device *dev,
  +   struct device_attribute *attr, 
  char
   *buf) {
  +   u32 value;
  +   unsigned int cpu = dev-id;
  +
  +   smp_call_function_single(cpu, do_show_pwrmgtcr0

RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle

2013-10-17 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wang Dongsheng-B40534
 Sent: Thursday, October 17, 2013 11:22 AM
 To: Bhushan Bharat-R65777; Wood Scott-B07421
 Cc: linuxppc-dev@lists.ozlabs.org
 Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec
 idle
 
 
 
  -Original Message-
  From: Bhushan Bharat-R65777
  Sent: Thursday, October 17, 2013 11:20 AM
  To: Wang Dongsheng-B40534; Wood Scott-B07421
  Cc: linuxppc-dev@lists.ozlabs.org
  Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and
  altivec idle
 
 
 
   -Original Message-
   From: Wang Dongsheng-B40534
   Sent: Thursday, October 17, 2013 8:16 AM
   To: Bhushan Bharat-R65777; Wood Scott-B07421
   Cc: linuxppc-dev@lists.ozlabs.org
   Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state
   and altivec idle
  
  
  
-Original Message-
From: Bhushan Bharat-R65777
Sent: Thursday, October 17, 2013 1:01 AM
To: Wang Dongsheng-B40534; Wood Scott-B07421
Cc: linuxppc-dev@lists.ozlabs.org
Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state
and altivec idle
   
   
   
 -Original Message-
 From: Wang Dongsheng-B40534
 Sent: Tuesday, October 15, 2013 2:51 PM
 To: Wood Scott-B07421
 Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org; Wang
Dongsheng-B40534
 Subject: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state
 and
altivec idle

 From: Wang Dongsheng dongsheng.w...@freescale.com

 Add a sys interface to enable/diable pw20 state or altivec idle,
 and
control the
 wait entry time.

 Enable/Disable interface:
 0, disable. 1, enable.
 /sys/devices/system/cpu/cpuX/pw20_state
 /sys/devices/system/cpu/cpuX/altivec_idle

 Set wait time interface:(Nanosecond)
 /sys/devices/system/cpu/cpuX/pw20_wait_time
 /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
 Example: Base on TBfreq is 41MHZ.
 1~48(ns): TB[63]
 49~97(ns): TB[62]
 98~195(ns): TB[61]
 196~390(ns): TB[60]
 391~780(ns): TB[59]
 781~1560(ns): TB[58]
 ...

 Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com
 ---
 *v5:
 Change get_idle_ticks_bit function implementation.

 *v4:
 Move code from 85xx/common.c to kernel/sysfs.c.

 Remove has_pw20_altivec_idle function.

 Change wait entry_bit to wait time.

 diff --git a/arch/powerpc/kernel/sysfs.c
 b/arch/powerpc/kernel/sysfs.c
index
 27a90b9..10d1128 100644
 --- a/arch/powerpc/kernel/sysfs.c
 +++ b/arch/powerpc/kernel/sysfs.c
 @@ -85,6 +85,284 @@ __setup(smt-snooze-delay=,
setup_smt_snooze_delay);

  #endif /* CONFIG_PPC64 */

 +#ifdef CONFIG_FSL_SOC
 +#define MAX_BIT  63
 +
 +static u64 pw20_wt;
 +static u64 altivec_idle_wt;
 +
 +static unsigned int get_idle_ticks_bit(u64 ns) {
 + u64 cycle;
 +
 + if (ns = 1)
 + cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec;
 + else
 + cycle = div_u64(ns * tb_ticks_per_usec, 1000);
 +
 + if (!cycle)
 + return 0;
 +
 + return ilog2(cycle);
 +}
 +
 +static void do_show_pwrmgtcr0(void *val) {
 + u32 *value = val;
 +
 + *value = mfspr(SPRN_PWRMGTCR0); }
 +
 +static ssize_t show_pw20_state(struct device *dev,
 + struct device_attribute *attr, char 
 *buf) {
 + u32 value;
 + unsigned int cpu = dev-id;
 +
 + smp_call_function_single(cpu, do_show_pwrmgtcr0, value, 1);
 +
 + value = PWRMGTCR0_PW20_WAIT;
 +
 + return sprintf(buf, %u\n, value ? 1 : 0); }
 +
 +static void do_store_pw20_state(void *val) {
 + u32 *value = val;
 + u32 pw20_state;
 +
 + pw20_state = mfspr(SPRN_PWRMGTCR0);
 +
 + if (*value)
 + pw20_state |= PWRMGTCR0_PW20_WAIT;
 + else
 + pw20_state = ~PWRMGTCR0_PW20_WAIT;
 +
 + mtspr(SPRN_PWRMGTCR0, pw20_state); }
 +
 +static ssize_t store_pw20_state(struct device *dev,
 + struct device_attribute *attr,
 + const char *buf, size_t count) {
 + u32 value;
 + unsigned int cpu = dev-id;
 +
 + if (kstrtou32(buf, 0, value))
 + return -EINVAL;
 +
 + if (value  1)
 + return -EINVAL;
 +
 + smp_call_function_single(cpu, do_store_pw20_state, value, 1);
 +
 + return count;
 +}
 +
 +static ssize_t show_pw20_wait_time(struct device *dev,
 + struct device_attribute *attr, char 
 *buf) {
 + u32 value;
 + u64 tb_cycle;
 + s64 time;
 +
 + unsigned int

RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle

2013-10-17 Thread Bhushan Bharat-R65777


   -Original Message-
   From: Wang Dongsheng-B40534
   Sent: Thursday, October 17, 2013 11:22 AM
   To: Bhushan Bharat-R65777; Wood Scott-B07421
   Cc: linuxppc-dev@lists.ozlabs.org
   Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state
   and altivec idle
  
  
  
-Original Message-
From: Bhushan Bharat-R65777
Sent: Thursday, October 17, 2013 11:20 AM
To: Wang Dongsheng-B40534; Wood Scott-B07421
Cc: linuxppc-dev@lists.ozlabs.org
Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state
and altivec idle
   
   
   
 -Original Message-
 From: Wang Dongsheng-B40534
 Sent: Thursday, October 17, 2013 8:16 AM
 To: Bhushan Bharat-R65777; Wood Scott-B07421
 Cc: linuxppc-dev@lists.ozlabs.org
 Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20
 state and altivec idle



  -Original Message-
  From: Bhushan Bharat-R65777
  Sent: Thursday, October 17, 2013 1:01 AM
  To: Wang Dongsheng-B40534; Wood Scott-B07421
  Cc: linuxppc-dev@lists.ozlabs.org
  Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20
  state and altivec idle
 
 
 
   -Original Message-
   From: Wang Dongsheng-B40534
   Sent: Tuesday, October 15, 2013 2:51 PM
   To: Wood Scott-B07421
   Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org;
   Wang
  Dongsheng-B40534
   Subject: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20
   state and
  altivec idle
  
   From: Wang Dongsheng dongsheng.w...@freescale.com
  
   Add a sys interface to enable/diable pw20 state or altivec
   idle, and
  control the
   wait entry time.
  
   Enable/Disable interface:
   0, disable. 1, enable.
   /sys/devices/system/cpu/cpuX/pw20_state
   /sys/devices/system/cpu/cpuX/altivec_idle
  
   Set wait time interface:(Nanosecond)
   /sys/devices/system/cpu/cpuX/pw20_wait_time
   /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
   Example: Base on TBfreq is 41MHZ.
   1~48(ns): TB[63]
   49~97(ns): TB[62]
   98~195(ns): TB[61]
   196~390(ns): TB[60]
   391~780(ns): TB[59]
   781~1560(ns): TB[58]
   ...
  
   Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com
   ---
   *v5:
   Change get_idle_ticks_bit function implementation.
  
   *v4:
   Move code from 85xx/common.c to kernel/sysfs.c.
  
   Remove has_pw20_altivec_idle function.
  
   Change wait entry_bit to wait time.
  
   diff --git a/arch/powerpc/kernel/sysfs.c
   b/arch/powerpc/kernel/sysfs.c
  index
   27a90b9..10d1128 100644
   --- a/arch/powerpc/kernel/sysfs.c
   +++ b/arch/powerpc/kernel/sysfs.c
   @@ -85,6 +85,284 @@ __setup(smt-snooze-delay=,
  setup_smt_snooze_delay);
  
#endif /* CONFIG_PPC64 */
  
   +#ifdef CONFIG_FSL_SOC
   +#define MAX_BIT  63
   +
   +static u64 pw20_wt;
   +static u64 altivec_idle_wt;
   +
   +static unsigned int get_idle_ticks_bit(u64 ns) {
   + u64 cycle;
   +
   + if (ns = 1)
   + cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec;
   + else
   + cycle = div_u64(ns * tb_ticks_per_usec, 1000);
   +
   + if (!cycle)
   + return 0;
   +
   + return ilog2(cycle);
   +}
   +
   +static void do_show_pwrmgtcr0(void *val) {
   + u32 *value = val;
   +
   + *value = mfspr(SPRN_PWRMGTCR0); }
   +
   +static ssize_t show_pw20_state(struct device *dev,
   + struct device_attribute *attr, char 
   *buf) {
   + u32 value;
   + unsigned int cpu = dev-id;
   +
   + smp_call_function_single(cpu, do_show_pwrmgtcr0, value,
   +1);
   +
   + value = PWRMGTCR0_PW20_WAIT;
   +
   + return sprintf(buf, %u\n, value ? 1 : 0); }
   +
   +static void do_store_pw20_state(void *val) {
   + u32 *value = val;
   + u32 pw20_state;
   +
   + pw20_state = mfspr(SPRN_PWRMGTCR0);
   +
   + if (*value)
   + pw20_state |= PWRMGTCR0_PW20_WAIT;
   + else
   + pw20_state = ~PWRMGTCR0_PW20_WAIT;
   +
   + mtspr(SPRN_PWRMGTCR0, pw20_state); }
   +
   +static ssize_t store_pw20_state(struct device *dev,
   + struct device_attribute *attr,
   + const char *buf, size_t count) {
   + u32 value;
   + unsigned int cpu = dev-id;
   +
   + if (kstrtou32(buf, 0, value))
   + return -EINVAL;
   +
   + if (value  1)
   + return -EINVAL;
   +
   + smp_call_function_single(cpu, do_store_pw20_state, value,
   +1);
   +
   + return count;
   +}
   +
   +static ssize_t

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777

> >
> > > >
> > > >
> > > > > -Original Message-
> > > > > From: Sethi Varun-B16395
> > > > > Sent: Wednesday, October 16, 2013 4:53 PM
> > > > > To: j...@8bytes.org; io...@lists.linux-foundation.org; linuxppc-
> > > > > d...@lists.ozlabs.org; linux-kernel@vger.kernel.org; Yoder
> > > > > Stuart-B08248; Wood Scott-B07421; alex.william...@redhat.com;
> > > > > Bhushan
> > > > > Bharat-R65777
> > > > > Cc: Sethi Varun-B16395
> > > > > Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for
> > > > > PCIe devices
> > > > >
> > > > > Once the PCIe device assigned to a guest VM (via VFIO) gets
> > > > > detached from the iommu domain (when guest terminates), its PAMU
> > > > > table entry is disabled. So, this would prevent the device from
> > > > > being used once it's
> > > > assigned back to the host.
> > > > >
> > > > > This patch allows for creation of a default DMA window
> > > > > corresponding to the device and subsequently enabling the PAMU
> > > > > table entry. Before we enable the entry, we ensure that the
> > > > > device's bus master capability is disabled (device quiesced).
> > > > >
> > > > > Signed-off-by: Varun Sethi 
> > > > > ---
> > > > >  drivers/iommu/fsl_pamu.c|   43
> > ---
> > > > -
> > > > >  drivers/iommu/fsl_pamu.h|1 +
> > > > >  drivers/iommu/fsl_pamu_domain.c |   46
> > > > ---
> > > > >  3 files changed, 78 insertions(+), 12 deletions(-)
> > > > >
> > > > > diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
> > > > > index
> > > > > cba0498..fb4a031 100644
> > > > > --- a/drivers/iommu/fsl_pamu.c
> > > > > +++ b/drivers/iommu/fsl_pamu.c
> > > > > @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct
> > > > > paace *paace,
> > > > > u32 wnum)
> > > > >   return spaace;
> > > > >  }
> > > > >
> > > > > +/*
> > > > > + * Defaul PPAACE settings for an LIODN.
> > > > > + */
> > > > > +static void setup_default_ppaace(struct paace *ppaace) {
> > > > > + pamu_init_ppaace(ppaace);
> > > > > + /* window size is 2^(WSE+1) bytes */
> > > > > + set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
> > > > > + ppaace->wbah = 0;
> > > > > + set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
> > > > > + set_bf(ppaace->impl_attr, PAACE_IA_ATM,
> > > > > + PAACE_ATM_NO_XLATE);
> > > > > + set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
> > > > > + PAACE_AP_PERMS_ALL);
> > > > > +}
> > > > >  /**
> > > > >   * pamu_get_fspi_and_allocate() - Allocates fspi index and
> > > > > reserves
> > > > subwindows
> > > > >   *required for primary PAACE in
> > the
> > > > secondary
> > > > > @@ -253,6 +268,24 @@ static unsigned long
> > > > > pamu_get_fspi_and_allocate(u32
> > > > > subwin_cnt)
> > > > >   return (spaace_addr - (unsigned long)spaact) / (sizeof(struct
> > > > > paace));  }
> > > > >
> > > > > +/* Reset the PAACE entry to the default state */ void
> > > > > +enable_default_dma_window(int liodn) {
> > > > > + struct paace *ppaace;
> > > > > +
> > > > > + ppaace = pamu_get_ppaace(liodn);
> > > > > + if (!ppaace) {
> > > > > + pr_debug("Invalid liodn entry\n");
> > > > > + return;
> > > > > + }
> > > > > +
> > > > > + memset(ppaace, 0, sizeof(struct paace));
> > > > > +
> > > > > + setup_default_ppaace(ppaace);
> > > > > + mb();
> > > > > + pamu_enable_liodn(liodn);
> > > > > +}
> > > > > +
> > > > >  /* Release the subwindows reserved for a particular LIODN */
> > > > > void

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777


> >
> >
> > > -Original Message-
> > > From: Sethi Varun-B16395
> > > Sent: Wednesday, October 16, 2013 4:53 PM
> > > To: j...@8bytes.org; io...@lists.linux-foundation.org; linuxppc-
> > > d...@lists.ozlabs.org; linux-kernel@vger.kernel.org; Yoder
> > > Stuart-B08248; Wood Scott-B07421; alex.william...@redhat.com;
> > > Bhushan
> > > Bharat-R65777
> > > Cc: Sethi Varun-B16395
> > > Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for
> > > PCIe devices
> > >
> > > Once the PCIe device assigned to a guest VM (via VFIO) gets detached
> > > from the iommu domain (when guest terminates), its PAMU table entry
> > > is disabled. So, this would prevent the device from being used once
> > > it's
> > assigned back to the host.
> > >
> > > This patch allows for creation of a default DMA window corresponding
> > > to the device and subsequently enabling the PAMU table entry. Before
> > > we enable the entry, we ensure that the device's bus master
> > > capability is disabled (device quiesced).
> > >
> > > Signed-off-by: Varun Sethi 
> > > ---
> > >  drivers/iommu/fsl_pamu.c|   43 ---
> > -
> > >  drivers/iommu/fsl_pamu.h|1 +
> > >  drivers/iommu/fsl_pamu_domain.c |   46
> > ---
> > >  3 files changed, 78 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
> > > index
> > > cba0498..fb4a031 100644
> > > --- a/drivers/iommu/fsl_pamu.c
> > > +++ b/drivers/iommu/fsl_pamu.c
> > > @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct
> > > paace *paace,
> > > u32 wnum)
> > >   return spaace;
> > >  }
> > >
> > > +/*
> > > + * Defaul PPAACE settings for an LIODN.
> > > + */
> > > +static void setup_default_ppaace(struct paace *ppaace) {
> > > + pamu_init_ppaace(ppaace);
> > > + /* window size is 2^(WSE+1) bytes */
> > > + set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
> > > + ppaace->wbah = 0;
> > > + set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
> > > + set_bf(ppaace->impl_attr, PAACE_IA_ATM,
> > > + PAACE_ATM_NO_XLATE);
> > > + set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
> > > + PAACE_AP_PERMS_ALL);
> > > +}
> > >  /**
> > >   * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves
> > subwindows
> > >   *required for primary PAACE in the
> > secondary
> > > @@ -253,6 +268,24 @@ static unsigned long
> > > pamu_get_fspi_and_allocate(u32
> > > subwin_cnt)
> > >   return (spaace_addr - (unsigned long)spaact) / (sizeof(struct
> > > paace));  }
> > >
> > > +/* Reset the PAACE entry to the default state */ void
> > > +enable_default_dma_window(int liodn) {
> > > + struct paace *ppaace;
> > > +
> > > + ppaace = pamu_get_ppaace(liodn);
> > > + if (!ppaace) {
> > > + pr_debug("Invalid liodn entry\n");
> > > + return;
> > > + }
> > > +
> > > + memset(ppaace, 0, sizeof(struct paace));
> > > +
> > > + setup_default_ppaace(ppaace);
> > > + mb();
> > > + pamu_enable_liodn(liodn);
> > > +}
> > > +
> > >  /* Release the subwindows reserved for a particular LIODN */  void
> > > pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static void
> > > __init
> > > setup_liodns(void)
> > >   continue;
> > >   }
> > >   ppaace = pamu_get_ppaace(liodn);
> > > - pamu_init_ppaace(ppaace);
> > > - /* window size is 2^(WSE+1) bytes */
> > > - set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
> > > - ppaace->wbah = 0;
> > > - set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
> > > - set_bf(ppaace->impl_attr, PAACE_IA_ATM,
> > > - PAACE_ATM_NO_XLATE);
> > > - set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
> > > - PAACE_AP_PERMS_ALL);
> > > + setup_default_ppaace(ppaace);
> > >   if (of_device_is_compatible

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Sethi Varun-B16395
> Sent: Wednesday, October 16, 2013 4:53 PM
> To: j...@8bytes.org; io...@lists.linux-foundation.org; linuxppc-
> d...@lists.ozlabs.org; linux-kernel@vger.kernel.org; Yoder Stuart-B08248; Wood
> Scott-B07421; alex.william...@redhat.com; Bhushan Bharat-R65777
> Cc: Sethi Varun-B16395
> Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices
> 
> Once the PCIe device assigned to a guest VM (via VFIO) gets detached from the
> iommu domain (when guest terminates), its PAMU table entry is disabled. So, 
> this
> would prevent the device from being used once it's assigned back to the host.
> 
> This patch allows for creation of a default DMA window corresponding to the
> device and subsequently enabling the PAMU table entry. Before we enable the
> entry, we ensure that the device's bus master capability is disabled (device
> quiesced).
> 
> Signed-off-by: Varun Sethi 
> ---
>  drivers/iommu/fsl_pamu.c|   43 
>  drivers/iommu/fsl_pamu.h|1 +
>  drivers/iommu/fsl_pamu_domain.c |   46 
> ---
>  3 files changed, 78 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index
> cba0498..fb4a031 100644
> --- a/drivers/iommu/fsl_pamu.c
> +++ b/drivers/iommu/fsl_pamu.c
> @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct paace *paace,
> u32 wnum)
>   return spaace;
>  }
> 
> +/*
> + * Defaul PPAACE settings for an LIODN.
> + */
> +static void setup_default_ppaace(struct paace *ppaace) {
> + pamu_init_ppaace(ppaace);
> + /* window size is 2^(WSE+1) bytes */
> + set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
> + ppaace->wbah = 0;
> + set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
> + set_bf(ppaace->impl_attr, PAACE_IA_ATM,
> + PAACE_ATM_NO_XLATE);
> + set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
> + PAACE_AP_PERMS_ALL);
> +}
>  /**
>   * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves 
> subwindows
>   *required for primary PAACE in the secondary
> @@ -253,6 +268,24 @@ static unsigned long pamu_get_fspi_and_allocate(u32
> subwin_cnt)
>   return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace));  
> }
> 
> +/* Reset the PAACE entry to the default state */ void
> +enable_default_dma_window(int liodn) {
> + struct paace *ppaace;
> +
> + ppaace = pamu_get_ppaace(liodn);
> + if (!ppaace) {
> + pr_debug("Invalid liodn entry\n");
> + return;
> + }
> +
> + memset(ppaace, 0, sizeof(struct paace));
> +
> + setup_default_ppaace(ppaace);
> + mb();
> + pamu_enable_liodn(liodn);
> +}
> +
>  /* Release the subwindows reserved for a particular LIODN */  void
> pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static void __init
> setup_liodns(void)
>   continue;
>   }
>   ppaace = pamu_get_ppaace(liodn);
> - pamu_init_ppaace(ppaace);
> - /* window size is 2^(WSE+1) bytes */
> - set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
> - ppaace->wbah = 0;
> - set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
> - set_bf(ppaace->impl_attr, PAACE_IA_ATM,
> - PAACE_ATM_NO_XLATE);
> - set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
> - PAACE_AP_PERMS_ALL);
> + setup_default_ppaace(ppaace);
>   if (of_device_is_compatible(node, "fsl,qman-portal"))
>   setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
>   if (of_device_is_compatible(node, "fsl,qman")) diff 
> --git
> a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index 8fc1a12..0edc
> 100644
> --- a/drivers/iommu/fsl_pamu.h
> +++ b/drivers/iommu/fsl_pamu.h
> @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct device *dev);  
> int
> pamu_update_paace_stash(int liodn, u32 subwin, u32 value);  int
> pamu_disable_spaace(int liodn, u32 subwin);
>  u32 pamu_get_max_subwin_cnt(void);
> +void enable_default_dma_window(int liodn);
> 
>  #endif  /* __FSL_PAMU_H */
> diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
> index 966ae70..dd6cafc 100644
> --- a/drivers/iommu/fsl_pamu_domain.c
> +++ b/drivers/iommu/fsl_pamu_domain

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Sethi Varun-B16395
 Sent: Wednesday, October 16, 2013 4:53 PM
 To: j...@8bytes.org; io...@lists.linux-foundation.org; linuxppc-
 d...@lists.ozlabs.org; linux-kernel@vger.kernel.org; Yoder Stuart-B08248; Wood
 Scott-B07421; alex.william...@redhat.com; Bhushan Bharat-R65777
 Cc: Sethi Varun-B16395
 Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices
 
 Once the PCIe device assigned to a guest VM (via VFIO) gets detached from the
 iommu domain (when guest terminates), its PAMU table entry is disabled. So, 
 this
 would prevent the device from being used once it's assigned back to the host.
 
 This patch allows for creation of a default DMA window corresponding to the
 device and subsequently enabling the PAMU table entry. Before we enable the
 entry, we ensure that the device's bus master capability is disabled (device
 quiesced).
 
 Signed-off-by: Varun Sethi varun.se...@freescale.com
 ---
  drivers/iommu/fsl_pamu.c|   43 
  drivers/iommu/fsl_pamu.h|1 +
  drivers/iommu/fsl_pamu_domain.c |   46 
 ---
  3 files changed, 78 insertions(+), 12 deletions(-)
 
 diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index
 cba0498..fb4a031 100644
 --- a/drivers/iommu/fsl_pamu.c
 +++ b/drivers/iommu/fsl_pamu.c
 @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct paace *paace,
 u32 wnum)
   return spaace;
  }
 
 +/*
 + * Defaul PPAACE settings for an LIODN.
 + */
 +static void setup_default_ppaace(struct paace *ppaace) {
 + pamu_init_ppaace(ppaace);
 + /* window size is 2^(WSE+1) bytes */
 + set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
 + ppaace-wbah = 0;
 + set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
 + set_bf(ppaace-impl_attr, PAACE_IA_ATM,
 + PAACE_ATM_NO_XLATE);
 + set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
 + PAACE_AP_PERMS_ALL);
 +}
  /**
   * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves 
 subwindows
   *required for primary PAACE in the secondary
 @@ -253,6 +268,24 @@ static unsigned long pamu_get_fspi_and_allocate(u32
 subwin_cnt)
   return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace));  
 }
 
 +/* Reset the PAACE entry to the default state */ void
 +enable_default_dma_window(int liodn) {
 + struct paace *ppaace;
 +
 + ppaace = pamu_get_ppaace(liodn);
 + if (!ppaace) {
 + pr_debug(Invalid liodn entry\n);
 + return;
 + }
 +
 + memset(ppaace, 0, sizeof(struct paace));
 +
 + setup_default_ppaace(ppaace);
 + mb();
 + pamu_enable_liodn(liodn);
 +}
 +
  /* Release the subwindows reserved for a particular LIODN */  void
 pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static void __init
 setup_liodns(void)
   continue;
   }
   ppaace = pamu_get_ppaace(liodn);
 - pamu_init_ppaace(ppaace);
 - /* window size is 2^(WSE+1) bytes */
 - set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
 - ppaace-wbah = 0;
 - set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
 - set_bf(ppaace-impl_attr, PAACE_IA_ATM,
 - PAACE_ATM_NO_XLATE);
 - set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
 - PAACE_AP_PERMS_ALL);
 + setup_default_ppaace(ppaace);
   if (of_device_is_compatible(node, fsl,qman-portal))
   setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
   if (of_device_is_compatible(node, fsl,qman)) diff 
 --git
 a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index 8fc1a12..0edc
 100644
 --- a/drivers/iommu/fsl_pamu.h
 +++ b/drivers/iommu/fsl_pamu.h
 @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct device *dev);  
 int
 pamu_update_paace_stash(int liodn, u32 subwin, u32 value);  int
 pamu_disable_spaace(int liodn, u32 subwin);
  u32 pamu_get_max_subwin_cnt(void);
 +void enable_default_dma_window(int liodn);
 
  #endif  /* __FSL_PAMU_H */
 diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
 index 966ae70..dd6cafc 100644
 --- a/drivers/iommu/fsl_pamu_domain.c
 +++ b/drivers/iommu/fsl_pamu_domain.c
 @@ -340,17 +340,57 @@ static inline struct device_domain_info
 *find_domain(struct device *dev)
   return dev-archdata.iommu_domain;
  }
 
 +/* Disable device DMA capability and enable default DMA window */
 +static void disable_device_dma(struct device_domain_info *info,
 + int enable_dma_window)
 +{
 +#ifdef CONFIG_PCI
 + if (info-dev-bus == pci_bus_type) {
 + struct pci_dev *pdev = NULL;
 + pdev = to_pci_dev(info-dev

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777


 
 
   -Original Message-
   From: Sethi Varun-B16395
   Sent: Wednesday, October 16, 2013 4:53 PM
   To: j...@8bytes.org; io...@lists.linux-foundation.org; linuxppc-
   d...@lists.ozlabs.org; linux-kernel@vger.kernel.org; Yoder
   Stuart-B08248; Wood Scott-B07421; alex.william...@redhat.com;
   Bhushan
   Bharat-R65777
   Cc: Sethi Varun-B16395
   Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for
   PCIe devices
  
   Once the PCIe device assigned to a guest VM (via VFIO) gets detached
   from the iommu domain (when guest terminates), its PAMU table entry
   is disabled. So, this would prevent the device from being used once
   it's
  assigned back to the host.
  
   This patch allows for creation of a default DMA window corresponding
   to the device and subsequently enabling the PAMU table entry. Before
   we enable the entry, we ensure that the device's bus master
   capability is disabled (device quiesced).
  
   Signed-off-by: Varun Sethi varun.se...@freescale.com
   ---
drivers/iommu/fsl_pamu.c|   43 ---
  -
drivers/iommu/fsl_pamu.h|1 +
drivers/iommu/fsl_pamu_domain.c |   46
  ---
3 files changed, 78 insertions(+), 12 deletions(-)
  
   diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
   index
   cba0498..fb4a031 100644
   --- a/drivers/iommu/fsl_pamu.c
   +++ b/drivers/iommu/fsl_pamu.c
   @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct
   paace *paace,
   u32 wnum)
 return spaace;
}
  
   +/*
   + * Defaul PPAACE settings for an LIODN.
   + */
   +static void setup_default_ppaace(struct paace *ppaace) {
   + pamu_init_ppaace(ppaace);
   + /* window size is 2^(WSE+1) bytes */
   + set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
   + ppaace-wbah = 0;
   + set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
   + set_bf(ppaace-impl_attr, PAACE_IA_ATM,
   + PAACE_ATM_NO_XLATE);
   + set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
   + PAACE_AP_PERMS_ALL);
   +}
/**
 * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves
  subwindows
 *required for primary PAACE in the
  secondary
   @@ -253,6 +268,24 @@ static unsigned long
   pamu_get_fspi_and_allocate(u32
   subwin_cnt)
 return (spaace_addr - (unsigned long)spaact) / (sizeof(struct
   paace));  }
  
   +/* Reset the PAACE entry to the default state */ void
   +enable_default_dma_window(int liodn) {
   + struct paace *ppaace;
   +
   + ppaace = pamu_get_ppaace(liodn);
   + if (!ppaace) {
   + pr_debug(Invalid liodn entry\n);
   + return;
   + }
   +
   + memset(ppaace, 0, sizeof(struct paace));
   +
   + setup_default_ppaace(ppaace);
   + mb();
   + pamu_enable_liodn(liodn);
   +}
   +
/* Release the subwindows reserved for a particular LIODN */  void
   pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static void
   __init
   setup_liodns(void)
 continue;
 }
 ppaace = pamu_get_ppaace(liodn);
   - pamu_init_ppaace(ppaace);
   - /* window size is 2^(WSE+1) bytes */
   - set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
   - ppaace-wbah = 0;
   - set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
   - set_bf(ppaace-impl_attr, PAACE_IA_ATM,
   - PAACE_ATM_NO_XLATE);
   - set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
   - PAACE_AP_PERMS_ALL);
   + setup_default_ppaace(ppaace);
 if (of_device_is_compatible(node, fsl,qman-portal))
 setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
 if (of_device_is_compatible(node, fsl,qman)) diff --
  git
   a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index
   8fc1a12..0edc
   100644
   --- a/drivers/iommu/fsl_pamu.h
   +++ b/drivers/iommu/fsl_pamu.h
   @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct device
   *dev);  int pamu_update_paace_stash(int liodn, u32 subwin, u32
   value); int pamu_disable_spaace(int liodn, u32 subwin);
u32 pamu_get_max_subwin_cnt(void);
   +void enable_default_dma_window(int liodn);
  
#endif  /* __FSL_PAMU_H */
   diff --git a/drivers/iommu/fsl_pamu_domain.c
   b/drivers/iommu/fsl_pamu_domain.c index 966ae70..dd6cafc 100644
   --- a/drivers/iommu/fsl_pamu_domain.c
   +++ b/drivers/iommu/fsl_pamu_domain.c
   @@ -340,17 +340,57 @@ static inline struct device_domain_info
   *find_domain(struct device *dev)
 return dev-archdata.iommu_domain;  }
  
   +/* Disable device DMA capability and enable default DMA window */
   +static void disable_device_dma(struct device_domain_info *info,
   + int enable_dma_window)
   +{
   +#ifdef CONFIG_PCI
   + if (info-dev-bus == pci_bus_type

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777

 
   
   
 -Original Message-
 From: Sethi Varun-B16395
 Sent: Wednesday, October 16, 2013 4:53 PM
 To: j...@8bytes.org; io...@lists.linux-foundation.org; linuxppc-
 d...@lists.ozlabs.org; linux-kernel@vger.kernel.org; Yoder
 Stuart-B08248; Wood Scott-B07421; alex.william...@redhat.com;
 Bhushan
 Bharat-R65777
 Cc: Sethi Varun-B16395
 Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for
 PCIe devices

 Once the PCIe device assigned to a guest VM (via VFIO) gets
 detached from the iommu domain (when guest terminates), its PAMU
 table entry is disabled. So, this would prevent the device from
 being used once it's
assigned back to the host.

 This patch allows for creation of a default DMA window
 corresponding to the device and subsequently enabling the PAMU
 table entry. Before we enable the entry, we ensure that the
 device's bus master capability is disabled (device quiesced).

 Signed-off-by: Varun Sethi varun.se...@freescale.com
 ---
  drivers/iommu/fsl_pamu.c|   43
  ---
-
  drivers/iommu/fsl_pamu.h|1 +
  drivers/iommu/fsl_pamu_domain.c |   46
---
  3 files changed, 78 insertions(+), 12 deletions(-)

 diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
 index
 cba0498..fb4a031 100644
 --- a/drivers/iommu/fsl_pamu.c
 +++ b/drivers/iommu/fsl_pamu.c
 @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct
 paace *paace,
 u32 wnum)
   return spaace;
  }

 +/*
 + * Defaul PPAACE settings for an LIODN.
 + */
 +static void setup_default_ppaace(struct paace *ppaace) {
 + pamu_init_ppaace(ppaace);
 + /* window size is 2^(WSE+1) bytes */
 + set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
 + ppaace-wbah = 0;
 + set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
 + set_bf(ppaace-impl_attr, PAACE_IA_ATM,
 + PAACE_ATM_NO_XLATE);
 + set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
 + PAACE_AP_PERMS_ALL);
 +}
  /**
   * pamu_get_fspi_and_allocate() - Allocates fspi index and
 reserves
subwindows
   *required for primary PAACE in
  the
secondary
 @@ -253,6 +268,24 @@ static unsigned long
 pamu_get_fspi_and_allocate(u32
 subwin_cnt)
   return (spaace_addr - (unsigned long)spaact) / (sizeof(struct
 paace));  }

 +/* Reset the PAACE entry to the default state */ void
 +enable_default_dma_window(int liodn) {
 + struct paace *ppaace;
 +
 + ppaace = pamu_get_ppaace(liodn);
 + if (!ppaace) {
 + pr_debug(Invalid liodn entry\n);
 + return;
 + }
 +
 + memset(ppaace, 0, sizeof(struct paace));
 +
 + setup_default_ppaace(ppaace);
 + mb();
 + pamu_enable_liodn(liodn);
 +}
 +
  /* Release the subwindows reserved for a particular LIODN */
 void pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static
 void __init
 setup_liodns(void)
   continue;
   }
   ppaace = pamu_get_ppaace(liodn);
 - pamu_init_ppaace(ppaace);
 - /* window size is 2^(WSE+1) bytes */
 - set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE,
  35);
 - ppaace-wbah = 0;
 - set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL,
  0);
 - set_bf(ppaace-impl_attr, PAACE_IA_ATM,
 - PAACE_ATM_NO_XLATE);
 - set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
 - PAACE_AP_PERMS_ALL);
 + setup_default_ppaace(ppaace);
   if (of_device_is_compatible(node, fsl,qman-
  portal))
   setup_qbman_paace(ppaace,
  QMAN_PORTAL_PAACE);
   if (of_device_is_compatible(node, fsl,qman))
  diff --
git
 a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index
 8fc1a12..0edc
 100644
 --- a/drivers/iommu/fsl_pamu.h
 +++ b/drivers/iommu/fsl_pamu.h
 @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct
 device *dev);  int pamu_update_paace_stash(int liodn, u32
 subwin,
 u32 value); int pamu_disable_spaace(int liodn, u32 subwin);
  u32 pamu_get_max_subwin_cnt(void);
 +void enable_default_dma_window(int liodn);

  #endif  /* __FSL_PAMU_H */
 diff --git a/drivers/iommu/fsl_pamu_domain.c
 b/drivers/iommu/fsl_pamu_domain.c index 966ae70..dd6cafc 100644
 --- a/drivers/iommu/fsl_pamu_domain.c
 +++ b/drivers/iommu

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777


 
 
   -Original Message-
   From: Sethi Varun-B16395
   Sent: Wednesday, October 16, 2013 4:53 PM
   To: j...@8bytes.org; iommu@lists.linux-foundation.org; linuxppc-
   d...@lists.ozlabs.org; linux-ker...@vger.kernel.org; Yoder
   Stuart-B08248; Wood Scott-B07421; alex.william...@redhat.com;
   Bhushan
   Bharat-R65777
   Cc: Sethi Varun-B16395
   Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for
   PCIe devices
  
   Once the PCIe device assigned to a guest VM (via VFIO) gets detached
   from the iommu domain (when guest terminates), its PAMU table entry
   is disabled. So, this would prevent the device from being used once
   it's
  assigned back to the host.
  
   This patch allows for creation of a default DMA window corresponding
   to the device and subsequently enabling the PAMU table entry. Before
   we enable the entry, we ensure that the device's bus master
   capability is disabled (device quiesced).
  
   Signed-off-by: Varun Sethi varun.se...@freescale.com
   ---
drivers/iommu/fsl_pamu.c|   43 ---
  -
drivers/iommu/fsl_pamu.h|1 +
drivers/iommu/fsl_pamu_domain.c |   46
  ---
3 files changed, 78 insertions(+), 12 deletions(-)
  
   diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
   index
   cba0498..fb4a031 100644
   --- a/drivers/iommu/fsl_pamu.c
   +++ b/drivers/iommu/fsl_pamu.c
   @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct
   paace *paace,
   u32 wnum)
 return spaace;
}
  
   +/*
   + * Defaul PPAACE settings for an LIODN.
   + */
   +static void setup_default_ppaace(struct paace *ppaace) {
   + pamu_init_ppaace(ppaace);
   + /* window size is 2^(WSE+1) bytes */
   + set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
   + ppaace-wbah = 0;
   + set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
   + set_bf(ppaace-impl_attr, PAACE_IA_ATM,
   + PAACE_ATM_NO_XLATE);
   + set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
   + PAACE_AP_PERMS_ALL);
   +}
/**
 * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves
  subwindows
 *required for primary PAACE in the
  secondary
   @@ -253,6 +268,24 @@ static unsigned long
   pamu_get_fspi_and_allocate(u32
   subwin_cnt)
 return (spaace_addr - (unsigned long)spaact) / (sizeof(struct
   paace));  }
  
   +/* Reset the PAACE entry to the default state */ void
   +enable_default_dma_window(int liodn) {
   + struct paace *ppaace;
   +
   + ppaace = pamu_get_ppaace(liodn);
   + if (!ppaace) {
   + pr_debug(Invalid liodn entry\n);
   + return;
   + }
   +
   + memset(ppaace, 0, sizeof(struct paace));
   +
   + setup_default_ppaace(ppaace);
   + mb();
   + pamu_enable_liodn(liodn);
   +}
   +
/* Release the subwindows reserved for a particular LIODN */  void
   pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static void
   __init
   setup_liodns(void)
 continue;
 }
 ppaace = pamu_get_ppaace(liodn);
   - pamu_init_ppaace(ppaace);
   - /* window size is 2^(WSE+1) bytes */
   - set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
   - ppaace-wbah = 0;
   - set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
   - set_bf(ppaace-impl_attr, PAACE_IA_ATM,
   - PAACE_ATM_NO_XLATE);
   - set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
   - PAACE_AP_PERMS_ALL);
   + setup_default_ppaace(ppaace);
 if (of_device_is_compatible(node, fsl,qman-portal))
 setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
 if (of_device_is_compatible(node, fsl,qman)) diff --
  git
   a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index
   8fc1a12..0edc
   100644
   --- a/drivers/iommu/fsl_pamu.h
   +++ b/drivers/iommu/fsl_pamu.h
   @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct device
   *dev);  int pamu_update_paace_stash(int liodn, u32 subwin, u32
   value); int pamu_disable_spaace(int liodn, u32 subwin);
u32 pamu_get_max_subwin_cnt(void);
   +void enable_default_dma_window(int liodn);
  
#endif  /* __FSL_PAMU_H */
   diff --git a/drivers/iommu/fsl_pamu_domain.c
   b/drivers/iommu/fsl_pamu_domain.c index 966ae70..dd6cafc 100644
   --- a/drivers/iommu/fsl_pamu_domain.c
   +++ b/drivers/iommu/fsl_pamu_domain.c
   @@ -340,17 +340,57 @@ static inline struct device_domain_info
   *find_domain(struct device *dev)
 return dev-archdata.iommu_domain;  }
  
   +/* Disable device DMA capability and enable default DMA window */
   +static void disable_device_dma(struct device_domain_info *info,
   + int enable_dma_window)
   +{
   +#ifdef CONFIG_PCI
   + if (info-dev-bus == pci_bus_type

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777

 
   
   
 -Original Message-
 From: Sethi Varun-B16395
 Sent: Wednesday, October 16, 2013 4:53 PM
 To: j...@8bytes.org; iommu@lists.linux-foundation.org; linuxppc-
 d...@lists.ozlabs.org; linux-ker...@vger.kernel.org; Yoder
 Stuart-B08248; Wood Scott-B07421; alex.william...@redhat.com;
 Bhushan
 Bharat-R65777
 Cc: Sethi Varun-B16395
 Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for
 PCIe devices

 Once the PCIe device assigned to a guest VM (via VFIO) gets
 detached from the iommu domain (when guest terminates), its PAMU
 table entry is disabled. So, this would prevent the device from
 being used once it's
assigned back to the host.

 This patch allows for creation of a default DMA window
 corresponding to the device and subsequently enabling the PAMU
 table entry. Before we enable the entry, we ensure that the
 device's bus master capability is disabled (device quiesced).

 Signed-off-by: Varun Sethi varun.se...@freescale.com
 ---
  drivers/iommu/fsl_pamu.c|   43
  ---
-
  drivers/iommu/fsl_pamu.h|1 +
  drivers/iommu/fsl_pamu_domain.c |   46
---
  3 files changed, 78 insertions(+), 12 deletions(-)

 diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
 index
 cba0498..fb4a031 100644
 --- a/drivers/iommu/fsl_pamu.c
 +++ b/drivers/iommu/fsl_pamu.c
 @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct
 paace *paace,
 u32 wnum)
   return spaace;
  }

 +/*
 + * Defaul PPAACE settings for an LIODN.
 + */
 +static void setup_default_ppaace(struct paace *ppaace) {
 + pamu_init_ppaace(ppaace);
 + /* window size is 2^(WSE+1) bytes */
 + set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
 + ppaace-wbah = 0;
 + set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
 + set_bf(ppaace-impl_attr, PAACE_IA_ATM,
 + PAACE_ATM_NO_XLATE);
 + set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
 + PAACE_AP_PERMS_ALL);
 +}
  /**
   * pamu_get_fspi_and_allocate() - Allocates fspi index and
 reserves
subwindows
   *required for primary PAACE in
  the
secondary
 @@ -253,6 +268,24 @@ static unsigned long
 pamu_get_fspi_and_allocate(u32
 subwin_cnt)
   return (spaace_addr - (unsigned long)spaact) / (sizeof(struct
 paace));  }

 +/* Reset the PAACE entry to the default state */ void
 +enable_default_dma_window(int liodn) {
 + struct paace *ppaace;
 +
 + ppaace = pamu_get_ppaace(liodn);
 + if (!ppaace) {
 + pr_debug(Invalid liodn entry\n);
 + return;
 + }
 +
 + memset(ppaace, 0, sizeof(struct paace));
 +
 + setup_default_ppaace(ppaace);
 + mb();
 + pamu_enable_liodn(liodn);
 +}
 +
  /* Release the subwindows reserved for a particular LIODN */
 void pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static
 void __init
 setup_liodns(void)
   continue;
   }
   ppaace = pamu_get_ppaace(liodn);
 - pamu_init_ppaace(ppaace);
 - /* window size is 2^(WSE+1) bytes */
 - set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE,
  35);
 - ppaace-wbah = 0;
 - set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL,
  0);
 - set_bf(ppaace-impl_attr, PAACE_IA_ATM,
 - PAACE_ATM_NO_XLATE);
 - set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
 - PAACE_AP_PERMS_ALL);
 + setup_default_ppaace(ppaace);
   if (of_device_is_compatible(node, fsl,qman-
  portal))
   setup_qbman_paace(ppaace,
  QMAN_PORTAL_PAACE);
   if (of_device_is_compatible(node, fsl,qman))
  diff --
git
 a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index
 8fc1a12..0edc
 100644
 --- a/drivers/iommu/fsl_pamu.h
 +++ b/drivers/iommu/fsl_pamu.h
 @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct
 device *dev);  int pamu_update_paace_stash(int liodn, u32
 subwin,
 u32 value); int pamu_disable_spaace(int liodn, u32 subwin);
  u32 pamu_get_max_subwin_cnt(void);
 +void enable_default_dma_window(int liodn);

  #endif  /* __FSL_PAMU_H */
 diff --git a/drivers/iommu/fsl_pamu_domain.c
 b/drivers/iommu/fsl_pamu_domain.c index 966ae70..dd6cafc 100644
 --- a/drivers/iommu/fsl_pamu_domain.c
 +++ b/drivers/iommu

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Sethi Varun-B16395
 Sent: Wednesday, October 16, 2013 4:53 PM
 To: j...@8bytes.org; io...@lists.linux-foundation.org; linuxppc-
 d...@lists.ozlabs.org; linux-ker...@vger.kernel.org; Yoder Stuart-B08248; Wood
 Scott-B07421; alex.william...@redhat.com; Bhushan Bharat-R65777
 Cc: Sethi Varun-B16395
 Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices
 
 Once the PCIe device assigned to a guest VM (via VFIO) gets detached from the
 iommu domain (when guest terminates), its PAMU table entry is disabled. So, 
 this
 would prevent the device from being used once it's assigned back to the host.
 
 This patch allows for creation of a default DMA window corresponding to the
 device and subsequently enabling the PAMU table entry. Before we enable the
 entry, we ensure that the device's bus master capability is disabled (device
 quiesced).
 
 Signed-off-by: Varun Sethi varun.se...@freescale.com
 ---
  drivers/iommu/fsl_pamu.c|   43 
  drivers/iommu/fsl_pamu.h|1 +
  drivers/iommu/fsl_pamu_domain.c |   46 
 ---
  3 files changed, 78 insertions(+), 12 deletions(-)
 
 diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index
 cba0498..fb4a031 100644
 --- a/drivers/iommu/fsl_pamu.c
 +++ b/drivers/iommu/fsl_pamu.c
 @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct paace *paace,
 u32 wnum)
   return spaace;
  }
 
 +/*
 + * Defaul PPAACE settings for an LIODN.
 + */
 +static void setup_default_ppaace(struct paace *ppaace) {
 + pamu_init_ppaace(ppaace);
 + /* window size is 2^(WSE+1) bytes */
 + set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
 + ppaace-wbah = 0;
 + set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
 + set_bf(ppaace-impl_attr, PAACE_IA_ATM,
 + PAACE_ATM_NO_XLATE);
 + set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
 + PAACE_AP_PERMS_ALL);
 +}
  /**
   * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves 
 subwindows
   *required for primary PAACE in the secondary
 @@ -253,6 +268,24 @@ static unsigned long pamu_get_fspi_and_allocate(u32
 subwin_cnt)
   return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace));  
 }
 
 +/* Reset the PAACE entry to the default state */ void
 +enable_default_dma_window(int liodn) {
 + struct paace *ppaace;
 +
 + ppaace = pamu_get_ppaace(liodn);
 + if (!ppaace) {
 + pr_debug(Invalid liodn entry\n);
 + return;
 + }
 +
 + memset(ppaace, 0, sizeof(struct paace));
 +
 + setup_default_ppaace(ppaace);
 + mb();
 + pamu_enable_liodn(liodn);
 +}
 +
  /* Release the subwindows reserved for a particular LIODN */  void
 pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static void __init
 setup_liodns(void)
   continue;
   }
   ppaace = pamu_get_ppaace(liodn);
 - pamu_init_ppaace(ppaace);
 - /* window size is 2^(WSE+1) bytes */
 - set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
 - ppaace-wbah = 0;
 - set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
 - set_bf(ppaace-impl_attr, PAACE_IA_ATM,
 - PAACE_ATM_NO_XLATE);
 - set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
 - PAACE_AP_PERMS_ALL);
 + setup_default_ppaace(ppaace);
   if (of_device_is_compatible(node, fsl,qman-portal))
   setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
   if (of_device_is_compatible(node, fsl,qman)) diff 
 --git
 a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index 8fc1a12..0edc
 100644
 --- a/drivers/iommu/fsl_pamu.h
 +++ b/drivers/iommu/fsl_pamu.h
 @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct device *dev);  
 int
 pamu_update_paace_stash(int liodn, u32 subwin, u32 value);  int
 pamu_disable_spaace(int liodn, u32 subwin);
  u32 pamu_get_max_subwin_cnt(void);
 +void enable_default_dma_window(int liodn);
 
  #endif  /* __FSL_PAMU_H */
 diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
 index 966ae70..dd6cafc 100644
 --- a/drivers/iommu/fsl_pamu_domain.c
 +++ b/drivers/iommu/fsl_pamu_domain.c
 @@ -340,17 +340,57 @@ static inline struct device_domain_info
 *find_domain(struct device *dev)
   return dev-archdata.iommu_domain;
  }
 
 +/* Disable device DMA capability and enable default DMA window */
 +static void disable_device_dma(struct device_domain_info *info,
 + int enable_dma_window)
 +{
 +#ifdef CONFIG_PCI
 + if (info-dev-bus == pci_bus_type) {
 + struct pci_dev *pdev = NULL;
 + pdev = to_pci_dev(info-dev

RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle

2013-10-16 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wang Dongsheng-B40534
 Sent: Tuesday, October 15, 2013 2:51 PM
 To: Wood Scott-B07421
 Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org; Wang 
 Dongsheng-B40534
 Subject: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec 
 idle
 
 From: Wang Dongsheng dongsheng.w...@freescale.com
 
 Add a sys interface to enable/diable pw20 state or altivec idle, and control 
 the
 wait entry time.
 
 Enable/Disable interface:
 0, disable. 1, enable.
 /sys/devices/system/cpu/cpuX/pw20_state
 /sys/devices/system/cpu/cpuX/altivec_idle
 
 Set wait time interface:(Nanosecond)
 /sys/devices/system/cpu/cpuX/pw20_wait_time
 /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
 Example: Base on TBfreq is 41MHZ.
 1~48(ns): TB[63]
 49~97(ns): TB[62]
 98~195(ns): TB[61]
 196~390(ns): TB[60]
 391~780(ns): TB[59]
 781~1560(ns): TB[58]
 ...
 
 Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com
 ---
 *v5:
 Change get_idle_ticks_bit function implementation.
 
 *v4:
 Move code from 85xx/common.c to kernel/sysfs.c.
 
 Remove has_pw20_altivec_idle function.
 
 Change wait entry_bit to wait time.
 
 diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index
 27a90b9..10d1128 100644
 --- a/arch/powerpc/kernel/sysfs.c
 +++ b/arch/powerpc/kernel/sysfs.c
 @@ -85,6 +85,284 @@ __setup(smt-snooze-delay=, setup_smt_snooze_delay);
 
  #endif /* CONFIG_PPC64 */
 
 +#ifdef CONFIG_FSL_SOC
 +#define MAX_BIT  63
 +
 +static u64 pw20_wt;
 +static u64 altivec_idle_wt;
 +
 +static unsigned int get_idle_ticks_bit(u64 ns) {
 + u64 cycle;
 +
 + if (ns = 1)
 + cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec;
 + else
 + cycle = div_u64(ns * tb_ticks_per_usec, 1000);
 +
 + if (!cycle)
 + return 0;
 +
 + return ilog2(cycle);
 +}
 +
 +static void do_show_pwrmgtcr0(void *val) {
 + u32 *value = val;
 +
 + *value = mfspr(SPRN_PWRMGTCR0);
 +}
 +
 +static ssize_t show_pw20_state(struct device *dev,
 + struct device_attribute *attr, char *buf) {
 + u32 value;
 + unsigned int cpu = dev-id;
 +
 + smp_call_function_single(cpu, do_show_pwrmgtcr0, value, 1);
 +
 + value = PWRMGTCR0_PW20_WAIT;
 +
 + return sprintf(buf, %u\n, value ? 1 : 0); }
 +
 +static void do_store_pw20_state(void *val) {
 + u32 *value = val;
 + u32 pw20_state;
 +
 + pw20_state = mfspr(SPRN_PWRMGTCR0);
 +
 + if (*value)
 + pw20_state |= PWRMGTCR0_PW20_WAIT;
 + else
 + pw20_state = ~PWRMGTCR0_PW20_WAIT;
 +
 + mtspr(SPRN_PWRMGTCR0, pw20_state);
 +}
 +
 +static ssize_t store_pw20_state(struct device *dev,
 + struct device_attribute *attr,
 + const char *buf, size_t count)
 +{
 + u32 value;
 + unsigned int cpu = dev-id;
 +
 + if (kstrtou32(buf, 0, value))
 + return -EINVAL;
 +
 + if (value  1)
 + return -EINVAL;
 +
 + smp_call_function_single(cpu, do_store_pw20_state, value, 1);
 +
 + return count;
 +}
 +
 +static ssize_t show_pw20_wait_time(struct device *dev,
 + struct device_attribute *attr, char *buf) {
 + u32 value;
 + u64 tb_cycle;
 + s64 time;
 +
 + unsigned int cpu = dev-id;
 +
 + if (!pw20_wt) {
 + smp_call_function_single(cpu, do_show_pwrmgtcr0, value, 1);
 + value = (value  PWRMGTCR0_PW20_ENT) 
 + PWRMGTCR0_PW20_ENT_SHIFT;
 +
 + tb_cycle = (1  (MAX_BIT - value)) * 2;

Is value = 0 and value = 1 legal? These will make tb_cycle = 0,

 + time = div_u64(tb_cycle * 1000, tb_ticks_per_usec) - 1;

And time = -1;


 + } else {
 + time = pw20_wt;
 + }
 +
 + return sprintf(buf, %llu\n, time  0 ? time : 0);
 }
 +
 +static void set_pw20_wait_entry_bit(void *val) {
 + u32 *value = val;
 + u32 pw20_idle;
 +
 + pw20_idle = mfspr(SPRN_PWRMGTCR0);
 +
 + /* Set Automatic PW20 Core Idle Count */
 + /* clear count */
 + pw20_idle = ~PWRMGTCR0_PW20_ENT;
 +
 + /* set count */
 + pw20_idle |= ((MAX_BIT - *value)  PWRMGTCR0_PW20_ENT_SHIFT);
 +
 + mtspr(SPRN_PWRMGTCR0, pw20_idle);
 +}
 +
 +static ssize_t store_pw20_wait_time(struct device *dev,
 + struct device_attribute *attr,
 + const char *buf, size_t count)
 +{
 + u32 entry_bit;
 + u64 value;
 +
 + unsigned int cpu = dev-id;
 +
 + if (kstrtou64(buf, 0, value))
 + return -EINVAL;
 +
 + if (!value)
 + return -EINVAL;
 +
 + entry_bit = get_idle_ticks_bit(value);
 + if (entry_bit  MAX_BIT)
 + return -EINVAL;
 +
 + pw20_wt = value;
 + smp_call_function_single(cpu, set_pw20_wait_entry_bit,
 + entry_bit, 1);
 +
 + return count;
 +}
 +
 +static ssize_t

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777


 
 
   -Original Message-
   From: Sethi Varun-B16395
   Sent: Wednesday, October 16, 2013 4:53 PM
   To: j...@8bytes.org; io...@lists.linux-foundation.org; linuxppc-
   d...@lists.ozlabs.org; linux-ker...@vger.kernel.org; Yoder
   Stuart-B08248; Wood Scott-B07421; alex.william...@redhat.com;
   Bhushan
   Bharat-R65777
   Cc: Sethi Varun-B16395
   Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for
   PCIe devices
  
   Once the PCIe device assigned to a guest VM (via VFIO) gets detached
   from the iommu domain (when guest terminates), its PAMU table entry
   is disabled. So, this would prevent the device from being used once
   it's
  assigned back to the host.
  
   This patch allows for creation of a default DMA window corresponding
   to the device and subsequently enabling the PAMU table entry. Before
   we enable the entry, we ensure that the device's bus master
   capability is disabled (device quiesced).
  
   Signed-off-by: Varun Sethi varun.se...@freescale.com
   ---
drivers/iommu/fsl_pamu.c|   43 ---
  -
drivers/iommu/fsl_pamu.h|1 +
drivers/iommu/fsl_pamu_domain.c |   46
  ---
3 files changed, 78 insertions(+), 12 deletions(-)
  
   diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
   index
   cba0498..fb4a031 100644
   --- a/drivers/iommu/fsl_pamu.c
   +++ b/drivers/iommu/fsl_pamu.c
   @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct
   paace *paace,
   u32 wnum)
 return spaace;
}
  
   +/*
   + * Defaul PPAACE settings for an LIODN.
   + */
   +static void setup_default_ppaace(struct paace *ppaace) {
   + pamu_init_ppaace(ppaace);
   + /* window size is 2^(WSE+1) bytes */
   + set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
   + ppaace-wbah = 0;
   + set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
   + set_bf(ppaace-impl_attr, PAACE_IA_ATM,
   + PAACE_ATM_NO_XLATE);
   + set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
   + PAACE_AP_PERMS_ALL);
   +}
/**
 * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves
  subwindows
 *required for primary PAACE in the
  secondary
   @@ -253,6 +268,24 @@ static unsigned long
   pamu_get_fspi_and_allocate(u32
   subwin_cnt)
 return (spaace_addr - (unsigned long)spaact) / (sizeof(struct
   paace));  }
  
   +/* Reset the PAACE entry to the default state */ void
   +enable_default_dma_window(int liodn) {
   + struct paace *ppaace;
   +
   + ppaace = pamu_get_ppaace(liodn);
   + if (!ppaace) {
   + pr_debug(Invalid liodn entry\n);
   + return;
   + }
   +
   + memset(ppaace, 0, sizeof(struct paace));
   +
   + setup_default_ppaace(ppaace);
   + mb();
   + pamu_enable_liodn(liodn);
   +}
   +
/* Release the subwindows reserved for a particular LIODN */  void
   pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static void
   __init
   setup_liodns(void)
 continue;
 }
 ppaace = pamu_get_ppaace(liodn);
   - pamu_init_ppaace(ppaace);
   - /* window size is 2^(WSE+1) bytes */
   - set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
   - ppaace-wbah = 0;
   - set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
   - set_bf(ppaace-impl_attr, PAACE_IA_ATM,
   - PAACE_ATM_NO_XLATE);
   - set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
   - PAACE_AP_PERMS_ALL);
   + setup_default_ppaace(ppaace);
 if (of_device_is_compatible(node, fsl,qman-portal))
 setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
 if (of_device_is_compatible(node, fsl,qman)) diff --
  git
   a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index
   8fc1a12..0edc
   100644
   --- a/drivers/iommu/fsl_pamu.h
   +++ b/drivers/iommu/fsl_pamu.h
   @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct device
   *dev);  int pamu_update_paace_stash(int liodn, u32 subwin, u32
   value); int pamu_disable_spaace(int liodn, u32 subwin);
u32 pamu_get_max_subwin_cnt(void);
   +void enable_default_dma_window(int liodn);
  
#endif  /* __FSL_PAMU_H */
   diff --git a/drivers/iommu/fsl_pamu_domain.c
   b/drivers/iommu/fsl_pamu_domain.c index 966ae70..dd6cafc 100644
   --- a/drivers/iommu/fsl_pamu_domain.c
   +++ b/drivers/iommu/fsl_pamu_domain.c
   @@ -340,17 +340,57 @@ static inline struct device_domain_info
   *find_domain(struct device *dev)
 return dev-archdata.iommu_domain;  }
  
   +/* Disable device DMA capability and enable default DMA window */
   +static void disable_device_dma(struct device_domain_info *info,
   + int enable_dma_window)
   +{
   +#ifdef CONFIG_PCI
   + if (info-dev-bus == pci_bus_type

RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices

2013-10-16 Thread Bhushan Bharat-R65777

 
   
   
 -Original Message-
 From: Sethi Varun-B16395
 Sent: Wednesday, October 16, 2013 4:53 PM
 To: j...@8bytes.org; io...@lists.linux-foundation.org; linuxppc-
 d...@lists.ozlabs.org; linux-ker...@vger.kernel.org; Yoder
 Stuart-B08248; Wood Scott-B07421; alex.william...@redhat.com;
 Bhushan
 Bharat-R65777
 Cc: Sethi Varun-B16395
 Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for
 PCIe devices

 Once the PCIe device assigned to a guest VM (via VFIO) gets
 detached from the iommu domain (when guest terminates), its PAMU
 table entry is disabled. So, this would prevent the device from
 being used once it's
assigned back to the host.

 This patch allows for creation of a default DMA window
 corresponding to the device and subsequently enabling the PAMU
 table entry. Before we enable the entry, we ensure that the
 device's bus master capability is disabled (device quiesced).

 Signed-off-by: Varun Sethi varun.se...@freescale.com
 ---
  drivers/iommu/fsl_pamu.c|   43
  ---
-
  drivers/iommu/fsl_pamu.h|1 +
  drivers/iommu/fsl_pamu_domain.c |   46
---
  3 files changed, 78 insertions(+), 12 deletions(-)

 diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
 index
 cba0498..fb4a031 100644
 --- a/drivers/iommu/fsl_pamu.c
 +++ b/drivers/iommu/fsl_pamu.c
 @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct
 paace *paace,
 u32 wnum)
   return spaace;
  }

 +/*
 + * Defaul PPAACE settings for an LIODN.
 + */
 +static void setup_default_ppaace(struct paace *ppaace) {
 + pamu_init_ppaace(ppaace);
 + /* window size is 2^(WSE+1) bytes */
 + set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE, 35);
 + ppaace-wbah = 0;
 + set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL, 0);
 + set_bf(ppaace-impl_attr, PAACE_IA_ATM,
 + PAACE_ATM_NO_XLATE);
 + set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
 + PAACE_AP_PERMS_ALL);
 +}
  /**
   * pamu_get_fspi_and_allocate() - Allocates fspi index and
 reserves
subwindows
   *required for primary PAACE in
  the
secondary
 @@ -253,6 +268,24 @@ static unsigned long
 pamu_get_fspi_and_allocate(u32
 subwin_cnt)
   return (spaace_addr - (unsigned long)spaact) / (sizeof(struct
 paace));  }

 +/* Reset the PAACE entry to the default state */ void
 +enable_default_dma_window(int liodn) {
 + struct paace *ppaace;
 +
 + ppaace = pamu_get_ppaace(liodn);
 + if (!ppaace) {
 + pr_debug(Invalid liodn entry\n);
 + return;
 + }
 +
 + memset(ppaace, 0, sizeof(struct paace));
 +
 + setup_default_ppaace(ppaace);
 + mb();
 + pamu_enable_liodn(liodn);
 +}
 +
  /* Release the subwindows reserved for a particular LIODN */
 void pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static
 void __init
 setup_liodns(void)
   continue;
   }
   ppaace = pamu_get_ppaace(liodn);
 - pamu_init_ppaace(ppaace);
 - /* window size is 2^(WSE+1) bytes */
 - set_bf(ppaace-addr_bitfields, PPAACE_AF_WSE,
  35);
 - ppaace-wbah = 0;
 - set_bf(ppaace-addr_bitfields, PPAACE_AF_WBAL,
  0);
 - set_bf(ppaace-impl_attr, PAACE_IA_ATM,
 - PAACE_ATM_NO_XLATE);
 - set_bf(ppaace-addr_bitfields, PAACE_AF_AP,
 - PAACE_AP_PERMS_ALL);
 + setup_default_ppaace(ppaace);
   if (of_device_is_compatible(node, fsl,qman-
  portal))
   setup_qbman_paace(ppaace,
  QMAN_PORTAL_PAACE);
   if (of_device_is_compatible(node, fsl,qman))
  diff --
git
 a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index
 8fc1a12..0edc
 100644
 --- a/drivers/iommu/fsl_pamu.h
 +++ b/drivers/iommu/fsl_pamu.h
 @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct
 device *dev);  int pamu_update_paace_stash(int liodn, u32
 subwin,
 u32 value); int pamu_disable_spaace(int liodn, u32 subwin);
  u32 pamu_get_max_subwin_cnt(void);
 +void enable_default_dma_window(int liodn);

  #endif  /* __FSL_PAMU_H */
 diff --git a/drivers/iommu/fsl_pamu_domain.c
 b/drivers/iommu/fsl_pamu_domain.c index 966ae70..dd6cafc 100644
 --- a/drivers/iommu/fsl_pamu_domain.c
 +++ b/drivers/iommu

RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle

2013-10-16 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wang Dongsheng-B40534
 Sent: Thursday, October 17, 2013 8:16 AM
 To: Bhushan Bharat-R65777; Wood Scott-B07421
 Cc: linuxppc-dev@lists.ozlabs.org
 Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec
 idle
 
 
 
  -Original Message-
  From: Bhushan Bharat-R65777
  Sent: Thursday, October 17, 2013 1:01 AM
  To: Wang Dongsheng-B40534; Wood Scott-B07421
  Cc: linuxppc-dev@lists.ozlabs.org
  Subject: RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and
  altivec idle
 
 
 
   -Original Message-
   From: Wang Dongsheng-B40534
   Sent: Tuesday, October 15, 2013 2:51 PM
   To: Wood Scott-B07421
   Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org; Wang
  Dongsheng-B40534
   Subject: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and
  altivec idle
  
   From: Wang Dongsheng dongsheng.w...@freescale.com
  
   Add a sys interface to enable/diable pw20 state or altivec idle, and
  control the
   wait entry time.
  
   Enable/Disable interface:
   0, disable. 1, enable.
   /sys/devices/system/cpu/cpuX/pw20_state
   /sys/devices/system/cpu/cpuX/altivec_idle
  
   Set wait time interface:(Nanosecond)
   /sys/devices/system/cpu/cpuX/pw20_wait_time
   /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
   Example: Base on TBfreq is 41MHZ.
   1~48(ns): TB[63]
   49~97(ns): TB[62]
   98~195(ns): TB[61]
   196~390(ns): TB[60]
   391~780(ns): TB[59]
   781~1560(ns): TB[58]
   ...
  
   Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com
   ---
   *v5:
   Change get_idle_ticks_bit function implementation.
  
   *v4:
   Move code from 85xx/common.c to kernel/sysfs.c.
  
   Remove has_pw20_altivec_idle function.
  
   Change wait entry_bit to wait time.
  
   diff --git a/arch/powerpc/kernel/sysfs.c
   b/arch/powerpc/kernel/sysfs.c
  index
   27a90b9..10d1128 100644
   --- a/arch/powerpc/kernel/sysfs.c
   +++ b/arch/powerpc/kernel/sysfs.c
   @@ -85,6 +85,284 @@ __setup(smt-snooze-delay=,
  setup_smt_snooze_delay);
  
#endif /* CONFIG_PPC64 */
  
   +#ifdef CONFIG_FSL_SOC
   +#define MAX_BIT  63
   +
   +static u64 pw20_wt;
   +static u64 altivec_idle_wt;
   +
   +static unsigned int get_idle_ticks_bit(u64 ns) {
   + u64 cycle;
   +
   + if (ns = 1)
   + cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec;
   + else
   + cycle = div_u64(ns * tb_ticks_per_usec, 1000);
   +
   + if (!cycle)
   + return 0;
   +
   + return ilog2(cycle);
   +}
   +
   +static void do_show_pwrmgtcr0(void *val) {
   + u32 *value = val;
   +
   + *value = mfspr(SPRN_PWRMGTCR0);
   +}
   +
   +static ssize_t show_pw20_state(struct device *dev,
   + struct device_attribute *attr, char *buf) {
   + u32 value;
   + unsigned int cpu = dev-id;
   +
   + smp_call_function_single(cpu, do_show_pwrmgtcr0, value, 1);
   +
   + value = PWRMGTCR0_PW20_WAIT;
   +
   + return sprintf(buf, %u\n, value ? 1 : 0); }
   +
   +static void do_store_pw20_state(void *val) {
   + u32 *value = val;
   + u32 pw20_state;
   +
   + pw20_state = mfspr(SPRN_PWRMGTCR0);
   +
   + if (*value)
   + pw20_state |= PWRMGTCR0_PW20_WAIT;
   + else
   + pw20_state = ~PWRMGTCR0_PW20_WAIT;
   +
   + mtspr(SPRN_PWRMGTCR0, pw20_state); }
   +
   +static ssize_t store_pw20_state(struct device *dev,
   + struct device_attribute *attr,
   + const char *buf, size_t count)
   +{
   + u32 value;
   + unsigned int cpu = dev-id;
   +
   + if (kstrtou32(buf, 0, value))
   + return -EINVAL;
   +
   + if (value  1)
   + return -EINVAL;
   +
   + smp_call_function_single(cpu, do_store_pw20_state, value, 1);
   +
   + return count;
   +}
   +
   +static ssize_t show_pw20_wait_time(struct device *dev,
   + struct device_attribute *attr, char *buf) {
   + u32 value;
   + u64 tb_cycle;
   + s64 time;
   +
   + unsigned int cpu = dev-id;
   +
   + if (!pw20_wt) {
   + smp_call_function_single(cpu, do_show_pwrmgtcr0, value, 1);
   + value = (value  PWRMGTCR0_PW20_ENT) 
   + PWRMGTCR0_PW20_ENT_SHIFT;
   +
   + tb_cycle = (1  (MAX_BIT - value)) * 2;
 
  Is value = 0 and value = 1 legal? These will make tb_cycle = 0,
 
   + time = div_u64(tb_cycle * 1000, tb_ticks_per_usec) - 1;
 
  And time = -1;
 
 Please look at the end of the function, :)
 
 return sprintf(buf, %llu\n, time  0 ? time : 0);

I know you return 0 if value = 0/1, my question was that, is this correct as 
per specification?

Ahh, also for value upto 7 you will return 0, no?

-Bharat

 
 -dongsheng
 
 
   + } else {
   + time = pw20_wt;
   + }
   +
   + return sprintf(buf, %llu\n, time  0 ? time : 0);
   }
   +


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: RFC: (re-)binding the VFIO platform driver to a platform device

2013-10-10 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Wood Scott-B07421
> Sent: Thursday, October 10, 2013 8:53 PM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; Yoder Stuart-B08248; Kim Phillips; Christoffer Dall; 
> Alex
> Williamson; linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
> ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
> santosh.shu...@linaro.org; k...@vger.kernel.org; gre...@linuxfoundation.org
> Subject: Re: RFC: (re-)binding the VFIO platform driver to a platform device
> 
> On Thu, 2013-10-10 at 02:45 -0500, Bhushan Bharat-R65777 wrote:
> >
> > > -Original Message-
> > > From: Wood Scott-B07421
> > > Sent: Thursday, October 10, 2013 1:33 AM
> > > To: Yoder Stuart-B08248
> > > Cc: Wood Scott-B07421; Kim Phillips; Christoffer Dall; Alex
> > > Williamson; linux- ker...@vger.kernel.org;
> > > a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
> > > Bhushan Bharat-R65777; peter.mayd...@linaro.org;
> > > santosh.shu...@linaro.org; k...@vger.kernel.org;
> > > gre...@linuxfoundation.org
> > > Subject: Re: RFC: (re-)binding the VFIO platform driver to a
> > > platform device
> > >
> > > On Wed, 2013-10-09 at 14:44 -0500, Yoder Stuart-B08248 wrote:
> > > > Ah, think I understand now...yes that works as well, and would be
> > > > less intrustive.   So are you writing a patch? :)
> > >
> > > I've been meaning to since the previous round of discussion, but I've been
> busy.
> > > Would someone else be able to test it in the context of using it for VFIO?
> >
> > I wish I could have but I do not have vfio-platform stuff.
> 
> VFIO PCI without new_id would also be a useful test.

I will do that :)

-Bharat

> 
> -Scott
> 



RE: RFC: (re-)binding the VFIO platform driver to a platform device

2013-10-10 Thread Bhushan Bharat-R65777


> -Original Message-
> From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf 
> Of
> Kim Phillips
> Sent: Thursday, October 10, 2013 8:36 AM
> To: Wood Scott-B07421
> Cc: Yoder Stuart-B08248; Wood Scott-B07421; christoffer.d...@linaro.org;
> alex.william...@redhat.com; linux-kernel@vger.kernel.org;
> a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395; Bhushan
> Bharat-R65777; peter.mayd...@linaro.org; santosh.shu...@linaro.org;
> k...@vger.kernel.org; gre...@linuxfoundation.org
> Subject: Re: RFC: (re-)binding the VFIO platform driver to a platform device
> 
> On Wed, 9 Oct 2013 15:03:19 -0500
> Scott Wood  wrote:
> 
> > On Wed, 2013-10-09 at 14:44 -0500, Yoder Stuart-B08248 wrote:
> > > > From: Wood Scott-B07421
> > > > Sent: Wednesday, October 09, 2013 2:22 PM
> > > >
> > > > On Wed, 2013-10-09 at 14:02 -0500, Yoder Stuart-B08248 wrote:
> > > > > Have been thinking about this issue some more.  As Scott
> > > > > mentioned,
> 
> thanks for bringing this up again.
> 
> > > > There's already a "bool suppress_bind_attrs" to prevent sysfs
> > > > bind/unbind.  I suggested a similar flag to mean the oppsosite --
> > > > bind
> > > > *only* through sysfs.  Greg KH was skeptical and wanted to see a
> > > > patch before any further discussion.
> > >
> > > Ah, think I understand now...yes that works as well, and would be
> > > less intrustive.   So are you writing a patch? :)
> >
> > I've been meaning to since the previous round of discussion, but I've
> > been busy.  Would someone else be able to test it in the context of
> > using it for VFIO?
> 
> yes - see below.
> 
> > Otherwise, that looks about right, for the driver side (though
> > driver_attach could error out earlier rather than testing it inside
> > the loop).
> 
> I've made the changes you suggested and tested the resulting diff below on an
> arndale board.  I successfully performed the following sequence of commands
> after first changing the i2c@12C8 node in the device tree to be 
> exclusively
> compatible with "vfio":
> 
> ===
> # ls -l /sys/bus/platform/drivers/vfio-platform/
> total 0
> --w--- 1 root root 4096 Sep 24 19:17 bind
> --w--- 1 root root 4096 Sep 24 19:13 uevent
> --w--- 1 root root 4096 Sep 24 19:18 unbind # ls -l
> /sys/bus/platform/drivers/s3c-i2c total 0
> lrwxrwxrwx 1 root root0 Sep 24 19:11 12c6.i2c ->
> ../../../../devices/12c6.i2c
> lrwxrwxrwx 1 root root0 Sep 24 19:11 12c9.i2c ->
> ../../../../devices/12c9.i2c
> lrwxrwxrwx 1 root root0 Sep 24 19:20 12ce.i2c ->
> ../../../../devices/12ce.i2c
> --w--- 1 root root 4096 Sep 24 19:18 bind
> --w--- 1 root root 4096 Sep 24 19:11 uevent
> --w--- 1 root root 4096 Sep 24 19:17 unbind # ls -l
> /sys/devices/12c8.i2c/driver  # this is the one with the 'vfio' compatible
> ls: cannot access /sys/devices/12c8.i2c/driver: No such file or directory 
> #
> ls -l /sys/devices/12ce.i2c/driver lrwxrwxrwx 1 root root 0 Sep 24 19:18
> /sys/devices/12ce.i2c/driver -> ../../bus/platform/drivers/s3c-i2c
> # echo 12ce.i2c > /sys/bus/platform/drivers/s3c-i2c/unbind
> # ls -l /sys/devices/12ce.i2c/driver
> ls: cannot access /sys/devices/12ce.i2c/driver: No such file or directory 
> #
> echo 12ce.i2c > /sys/bus/platform/drivers/vfio-platform/bind
> # ls -l /sys/devices/12ce.i2c/driver lrwxrwxrwx 1 root root 0 Sep 24 19:21
> /sys/devices/12ce.i2c/driver -> ../../bus/platform/drivers/vfio-platform
> # echo 12ce.i2c > /sys/bus/platform/drivers/vfio-platform/unbind
> # ls -l /sys/devices/12ce.i2c/driver # echo 12ce.i2c >
> /sys/bus/platform/drivers/s3c-i2c/bind
> [  722.137524] s3c-i2c 12ce.i2c: slave address 0x38 [  722.141037] s3c-i2c
> 12ce.i2c: bus frequency set to 65 KHz [  722.150605] s3c-i2c 12ce.i2c:
> i2c-8: S3C I2C adapter # ls -l /sys/devices/12ce.i2c/driver lrwxrwxrwx 1
> root root 0 Sep 24 19:21 /sys/devices/12ce.i2c/driver ->
> ../../bus/platform/drivers/s3c-i2c
> #
> 
> 
> so it's correctly not allowing 'vfio' driver to bind to a device tree 
> compatible
> it's declared, and it then can bind the i2c @ 12ce device to the vfio-
> platform driver, and unbind and bind it back to the i2c driver.
> 
> For clarity's sake, before this diff, the command:
> 
> echo 12ce.i2c > /sys/bus/platform/drivers/vfio-platform/bind
> 
> would error with:
> 
> echo: write error: No such device
> 
> > The other half 

RE: RFC: (re-)binding the VFIO platform driver to a platform device

2013-10-10 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Wood Scott-B07421
> Sent: Thursday, October 10, 2013 1:33 AM
> To: Yoder Stuart-B08248
> Cc: Wood Scott-B07421; Kim Phillips; Christoffer Dall; Alex Williamson; linux-
> ker...@vger.kernel.org; a.mota...@virtualopensystems.com; ag...@suse.de; Sethi
> Varun-B16395; Bhushan Bharat-R65777; peter.mayd...@linaro.org;
> santosh.shu...@linaro.org; k...@vger.kernel.org; gre...@linuxfoundation.org
> Subject: Re: RFC: (re-)binding the VFIO platform driver to a platform device
> 
> On Wed, 2013-10-09 at 14:44 -0500, Yoder Stuart-B08248 wrote:
> >
> > > -Original Message-
> > > From: Wood Scott-B07421
> > > Sent: Wednesday, October 09, 2013 2:22 PM
> > > To: Yoder Stuart-B08248
> > > Cc: Wood Scott-B07421; Kim Phillips; Christoffer Dall; Alex
> > > Williamson; linux-kernel@vger.kernel.org;
> > > a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
> > > Bhushan Bharat-R65777; peter.mayd...@linaro.org;
> > > santosh.shu...@linaro.org; k...@vger.kernel.org;
> > > gre...@linuxfoundation.org
> > > Subject: Re: RFC: (re-)binding the VFIO platform driver to a
> > > platform device
> > >
> > > On Wed, 2013-10-09 at 14:02 -0500, Yoder Stuart-B08248 wrote:
> > > > Have been thinking about this issue some more.  As Scott
> > > > mentioned, 'wildcard' matching for a driver can be fairly done in
> > > > the platform bus driver.  We could add a new flag to the platform driver
> struct:
> > > >
> > > > diff --git a/drivers/base/platform.c b/drivers/base/platform.c
> > > > index 4f8bef3..4d6cf14 100644
> > > > --- a/drivers/base/platform.c
> > > > +++ b/drivers/base/platform.c
> > > > @@ -727,6 +727,10 @@ static int platform_match(struct device *dev,
> > > struct device_driver *drv)
> > > > struct platform_device *pdev = to_platform_device(dev);
> > > > struct platform_driver *pdrv = to_platform_driver(drv);
> > > >
> > > > +   /* the driver matches any device */
> > > > +   if (pdrv->match_any)
> > > > +   return 1;
> > > > +
> > > > /* Attempt an OF style match first */
> > > > if (of_driver_match_device(dev, drv))
> > > > return 1;
> > > >
> > > > However, the more problematic issue is that a bus driver has no
> > > > way to differentiate from an explicit bind request via sysfs and a
> > > > bind that happened through bus probing.
> > >
> > > Again, I think the wildcard match should be orthogonal to "don't
> > > bind by default" as far as the mechanism goes.
> > >
> > > There's already a "bool suppress_bind_attrs" to prevent sysfs
> > > bind/unbind.  I suggested a similar flag to mean the oppsosite --
> > > bind
> > > *only* through sysfs.  Greg KH was skeptical and wanted to see a
> > > patch before any further discussion.
> >
> > Ah, think I understand now...yes that works as well, and would be
> > less intrustive.   So are you writing a patch? :)
> 
> I've been meaning to since the previous round of discussion, but I've been 
> busy.
> Would someone else be able to test it in the context of using it for VFIO?

I wish I could have but I do not have vfio-platform stuff. 

> 
> > It would be something like this, right?
> >
> > diff --git a/drivers/base/dd.c b/drivers/base/dd.c index
> > 35fa368..c9a61ea 100644
> > --- a/drivers/base/dd.c
> > +++ b/drivers/base/dd.c
> > @@ -389,7 +389,7 @@ static int __device_attach(struct device_driver
> > *drv, void *data)  {
> > struct device *dev = data;
> >
> > -   if (!driver_match_device(drv, dev))
> > +   if (!drv->explicit_bind_only && !driver_match_device(drv,
> > + dev))
> > return 0;
> 
> if (drv->explicit_bind_only || !driver_match_device(drv, dev))
>   return 0;

Scott, 
I am trying to understand what you are proposing here (example "DEVICE" can be 
handled by "DRIVER1" and "VFIO-PLATFORM-DRIVER"):
 - By default drv->explicit_bind_only will be clear in all drivers.
 - By default device->explicit_bind_only will also be clear for all devices.
 - On boot, matching devices will bound to the respective driver (DEVICE >==> 
DRIVER1).
   This will never bound with VFIO-PLATFORM-DRIVER. So far same as before.
 - Via Sysfs interface set drv->explicit_bind_only for VFIO-PLATFORM-DRIVER.
 - T

RE: RFC: (re-)binding the VFIO platform driver to a platform device

2013-10-10 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Thursday, October 10, 2013 1:33 AM
 To: Yoder Stuart-B08248
 Cc: Wood Scott-B07421; Kim Phillips; Christoffer Dall; Alex Williamson; linux-
 ker...@vger.kernel.org; a.mota...@virtualopensystems.com; ag...@suse.de; Sethi
 Varun-B16395; Bhushan Bharat-R65777; peter.mayd...@linaro.org;
 santosh.shu...@linaro.org; k...@vger.kernel.org; gre...@linuxfoundation.org
 Subject: Re: RFC: (re-)binding the VFIO platform driver to a platform device
 
 On Wed, 2013-10-09 at 14:44 -0500, Yoder Stuart-B08248 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Wednesday, October 09, 2013 2:22 PM
   To: Yoder Stuart-B08248
   Cc: Wood Scott-B07421; Kim Phillips; Christoffer Dall; Alex
   Williamson; linux-kernel@vger.kernel.org;
   a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
   Bhushan Bharat-R65777; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; k...@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: RFC: (re-)binding the VFIO platform driver to a
   platform device
  
   On Wed, 2013-10-09 at 14:02 -0500, Yoder Stuart-B08248 wrote:
Have been thinking about this issue some more.  As Scott
mentioned, 'wildcard' matching for a driver can be fairly done in
the platform bus driver.  We could add a new flag to the platform driver
 struct:
   
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 4f8bef3..4d6cf14 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -727,6 +727,10 @@ static int platform_match(struct device *dev,
   struct device_driver *drv)
struct platform_device *pdev = to_platform_device(dev);
struct platform_driver *pdrv = to_platform_driver(drv);
   
+   /* the driver matches any device */
+   if (pdrv-match_any)
+   return 1;
+
/* Attempt an OF style match first */
if (of_driver_match_device(dev, drv))
return 1;
   
However, the more problematic issue is that a bus driver has no
way to differentiate from an explicit bind request via sysfs and a
bind that happened through bus probing.
  
   Again, I think the wildcard match should be orthogonal to don't
   bind by default as far as the mechanism goes.
  
   There's already a bool suppress_bind_attrs to prevent sysfs
   bind/unbind.  I suggested a similar flag to mean the oppsosite --
   bind
   *only* through sysfs.  Greg KH was skeptical and wanted to see a
   patch before any further discussion.
 
  Ah, think I understand now...yes that works as well, and would be
  less intrustive.   So are you writing a patch? :)
 
 I've been meaning to since the previous round of discussion, but I've been 
 busy.
 Would someone else be able to test it in the context of using it for VFIO?

I wish I could have but I do not have vfio-platform stuff. 

 
  It would be something like this, right?
 
  diff --git a/drivers/base/dd.c b/drivers/base/dd.c index
  35fa368..c9a61ea 100644
  --- a/drivers/base/dd.c
  +++ b/drivers/base/dd.c
  @@ -389,7 +389,7 @@ static int __device_attach(struct device_driver
  *drv, void *data)  {
  struct device *dev = data;
 
  -   if (!driver_match_device(drv, dev))
  +   if (!drv-explicit_bind_only  !driver_match_device(drv,
  + dev))
  return 0;
 
 if (drv-explicit_bind_only || !driver_match_device(drv, dev))
   return 0;

Scott, 
I am trying to understand what you are proposing here (example DEVICE can be 
handled by DRIVER1 and VFIO-PLATFORM-DRIVER):
 - By default drv-explicit_bind_only will be clear in all drivers.
 - By default device-explicit_bind_only will also be clear for all devices.
 - On boot, matching devices will bound to the respective driver (DEVICE == 
DRIVER1).
   This will never bound with VFIO-PLATFORM-DRIVER. So far same as before.
 - Via Sysfs interface set drv-explicit_bind_only for VFIO-PLATFORM-DRIVER.
 - Then for the devices user want, set device-explicit_bind_only.
 - unbind DEVICE from DRIVER1
 - bind DEVICE with VFIO-PLATFORM-DRIVER. This time it will be successful 
because (device-explicit_bind_only  drv-explicit_bind_only) is set.
 - Now when done, unbind the DEVICE from VFIO-PLATFORM-DRIVER.
 - Now user can re-bind the device with either DRIVER1 or VFIO-PLATFORM-DRIVER.
 - Now once drv-explicit_bind_only is set in VFIO-PLATFORM-DRIVER, and a new 
device comes (device - hotplug) then can gets bound to matching drive and not 
with VFIO-PLATFORM-DRIVER.

This looks ok to me :)

Thanks
-Bharat
 
  return driver_probe_device(drv, dev); @@ -450,7 +450,7 @@
  static int __driver_attach(struct device *dev, void *data)
   * is an error.
   */
 
  -   if (!driver_match_device(drv, dev))
  +   if (!drv-explicit_bind_only  !driver_match_device(drv,
  + dev))
  return 0;
 
 Likewise -- or error out earlier in driver_attach().
 
 Otherwise

RE: RFC: (re-)binding the VFIO platform driver to a platform device

2013-10-10 Thread Bhushan Bharat-R65777


 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf 
 Of
 Kim Phillips
 Sent: Thursday, October 10, 2013 8:36 AM
 To: Wood Scott-B07421
 Cc: Yoder Stuart-B08248; Wood Scott-B07421; christoffer.d...@linaro.org;
 alex.william...@redhat.com; linux-kernel@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395; Bhushan
 Bharat-R65777; peter.mayd...@linaro.org; santosh.shu...@linaro.org;
 k...@vger.kernel.org; gre...@linuxfoundation.org
 Subject: Re: RFC: (re-)binding the VFIO platform driver to a platform device
 
 On Wed, 9 Oct 2013 15:03:19 -0500
 Scott Wood scottw...@freescale.com wrote:
 
  On Wed, 2013-10-09 at 14:44 -0500, Yoder Stuart-B08248 wrote:
From: Wood Scott-B07421
Sent: Wednesday, October 09, 2013 2:22 PM
   
On Wed, 2013-10-09 at 14:02 -0500, Yoder Stuart-B08248 wrote:
 Have been thinking about this issue some more.  As Scott
 mentioned,
 
 thanks for bringing this up again.
 
There's already a bool suppress_bind_attrs to prevent sysfs
bind/unbind.  I suggested a similar flag to mean the oppsosite --
bind
*only* through sysfs.  Greg KH was skeptical and wanted to see a
patch before any further discussion.
  
   Ah, think I understand now...yes that works as well, and would be
   less intrustive.   So are you writing a patch? :)
 
  I've been meaning to since the previous round of discussion, but I've
  been busy.  Would someone else be able to test it in the context of
  using it for VFIO?
 
 yes - see below.
 
  Otherwise, that looks about right, for the driver side (though
  driver_attach could error out earlier rather than testing it inside
  the loop).
 
 I've made the changes you suggested and tested the resulting diff below on an
 arndale board.  I successfully performed the following sequence of commands
 after first changing the i2c@12C8 node in the device tree to be 
 exclusively
 compatible with vfio:
 
 ===
 # ls -l /sys/bus/platform/drivers/vfio-platform/
 total 0
 --w--- 1 root root 4096 Sep 24 19:17 bind
 --w--- 1 root root 4096 Sep 24 19:13 uevent
 --w--- 1 root root 4096 Sep 24 19:18 unbind # ls -l
 /sys/bus/platform/drivers/s3c-i2c total 0
 lrwxrwxrwx 1 root root0 Sep 24 19:11 12c6.i2c -
 ../../../../devices/12c6.i2c
 lrwxrwxrwx 1 root root0 Sep 24 19:11 12c9.i2c -
 ../../../../devices/12c9.i2c
 lrwxrwxrwx 1 root root0 Sep 24 19:20 12ce.i2c -
 ../../../../devices/12ce.i2c
 --w--- 1 root root 4096 Sep 24 19:18 bind
 --w--- 1 root root 4096 Sep 24 19:11 uevent
 --w--- 1 root root 4096 Sep 24 19:17 unbind # ls -l
 /sys/devices/12c8.i2c/driver  # this is the one with the 'vfio' compatible
 ls: cannot access /sys/devices/12c8.i2c/driver: No such file or directory 
 #
 ls -l /sys/devices/12ce.i2c/driver lrwxrwxrwx 1 root root 0 Sep 24 19:18
 /sys/devices/12ce.i2c/driver - ../../bus/platform/drivers/s3c-i2c
 # echo 12ce.i2c  /sys/bus/platform/drivers/s3c-i2c/unbind
 # ls -l /sys/devices/12ce.i2c/driver
 ls: cannot access /sys/devices/12ce.i2c/driver: No such file or directory 
 #
 echo 12ce.i2c  /sys/bus/platform/drivers/vfio-platform/bind
 # ls -l /sys/devices/12ce.i2c/driver lrwxrwxrwx 1 root root 0 Sep 24 19:21
 /sys/devices/12ce.i2c/driver - ../../bus/platform/drivers/vfio-platform
 # echo 12ce.i2c  /sys/bus/platform/drivers/vfio-platform/unbind
 # ls -l /sys/devices/12ce.i2c/driver # echo 12ce.i2c 
 /sys/bus/platform/drivers/s3c-i2c/bind
 [  722.137524] s3c-i2c 12ce.i2c: slave address 0x38 [  722.141037] s3c-i2c
 12ce.i2c: bus frequency set to 65 KHz [  722.150605] s3c-i2c 12ce.i2c:
 i2c-8: S3C I2C adapter # ls -l /sys/devices/12ce.i2c/driver lrwxrwxrwx 1
 root root 0 Sep 24 19:21 /sys/devices/12ce.i2c/driver -
 ../../bus/platform/drivers/s3c-i2c
 #
 
 
 so it's correctly not allowing 'vfio' driver to bind to a device tree 
 compatible
 it's declared, and it then can bind the i2c @ 12ce device to the vfio-
 platform driver, and unbind and bind it back to the i2c driver.
 
 For clarity's sake, before this diff, the command:
 
 echo 12ce.i2c  /sys/bus/platform/drivers/vfio-platform/bind
 
 would error with:
 
 echo: write error: No such device
 
  The other half of fixing the raciness is to ensure that the device
  doesn't get bound back to a non-VFIO driver (e.g. due to a module load
  or new_id).  The solution I proposed for that was a similar
  explicit-bind-only flag for a device, that the user sets through sysfs
  prior to unbinding.  This would also be useful in non-VFIO contexts to
  simply say I don't want to use this device at all.
 
 I can take a look at doing this if you're still busy.
 
 Thanks,
 
 Kim
 
 diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 73f6c29..da81442
 100644
 --- a/drivers/base/bus.c
 +++ b/drivers/base/bus.c
 @@ -201,7 +201,8 @@ static ssize_t bind_store(struct

RE: RFC: (re-)binding the VFIO platform driver to a platform device

2013-10-10 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Thursday, October 10, 2013 8:53 PM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Yoder Stuart-B08248; Kim Phillips; Christoffer Dall; 
 Alex
 Williamson; linux-kernel@vger.kernel.org; a.mota...@virtualopensystems.com;
 ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
 santosh.shu...@linaro.org; k...@vger.kernel.org; gre...@linuxfoundation.org
 Subject: Re: RFC: (re-)binding the VFIO platform driver to a platform device
 
 On Thu, 2013-10-10 at 02:45 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Thursday, October 10, 2013 1:33 AM
   To: Yoder Stuart-B08248
   Cc: Wood Scott-B07421; Kim Phillips; Christoffer Dall; Alex
   Williamson; linux- ker...@vger.kernel.org;
   a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
   Bhushan Bharat-R65777; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; k...@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: RFC: (re-)binding the VFIO platform driver to a
   platform device
  
   On Wed, 2013-10-09 at 14:44 -0500, Yoder Stuart-B08248 wrote:
Ah, think I understand now...yes that works as well, and would be
less intrustive.   So are you writing a patch? :)
  
   I've been meaning to since the previous round of discussion, but I've been
 busy.
   Would someone else be able to test it in the context of using it for VFIO?
 
  I wish I could have but I do not have vfio-platform stuff.
 
 VFIO PCI without new_id would also be a useful test.

I will do that :)

-Bharat

 
 -Scott
 



RE: RFC: (re-)binding the VFIO platform driver to a platform device

2013-10-10 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Thursday, October 10, 2013 1:33 AM
 To: Yoder Stuart-B08248
 Cc: Wood Scott-B07421; Kim Phillips; Christoffer Dall; Alex Williamson; linux-
 ker...@vger.kernel.org; a.mota...@virtualopensystems.com; ag...@suse.de; Sethi
 Varun-B16395; Bhushan Bharat-R65777; peter.mayd...@linaro.org;
 santosh.shu...@linaro.org; kvm@vger.kernel.org; gre...@linuxfoundation.org
 Subject: Re: RFC: (re-)binding the VFIO platform driver to a platform device
 
 On Wed, 2013-10-09 at 14:44 -0500, Yoder Stuart-B08248 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Wednesday, October 09, 2013 2:22 PM
   To: Yoder Stuart-B08248
   Cc: Wood Scott-B07421; Kim Phillips; Christoffer Dall; Alex
   Williamson; linux-ker...@vger.kernel.org;
   a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
   Bhushan Bharat-R65777; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; kvm@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: RFC: (re-)binding the VFIO platform driver to a
   platform device
  
   On Wed, 2013-10-09 at 14:02 -0500, Yoder Stuart-B08248 wrote:
Have been thinking about this issue some more.  As Scott
mentioned, 'wildcard' matching for a driver can be fairly done in
the platform bus driver.  We could add a new flag to the platform driver
 struct:
   
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 4f8bef3..4d6cf14 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -727,6 +727,10 @@ static int platform_match(struct device *dev,
   struct device_driver *drv)
struct platform_device *pdev = to_platform_device(dev);
struct platform_driver *pdrv = to_platform_driver(drv);
   
+   /* the driver matches any device */
+   if (pdrv-match_any)
+   return 1;
+
/* Attempt an OF style match first */
if (of_driver_match_device(dev, drv))
return 1;
   
However, the more problematic issue is that a bus driver has no
way to differentiate from an explicit bind request via sysfs and a
bind that happened through bus probing.
  
   Again, I think the wildcard match should be orthogonal to don't
   bind by default as far as the mechanism goes.
  
   There's already a bool suppress_bind_attrs to prevent sysfs
   bind/unbind.  I suggested a similar flag to mean the oppsosite --
   bind
   *only* through sysfs.  Greg KH was skeptical and wanted to see a
   patch before any further discussion.
 
  Ah, think I understand now...yes that works as well, and would be
  less intrustive.   So are you writing a patch? :)
 
 I've been meaning to since the previous round of discussion, but I've been 
 busy.
 Would someone else be able to test it in the context of using it for VFIO?

I wish I could have but I do not have vfio-platform stuff. 

 
  It would be something like this, right?
 
  diff --git a/drivers/base/dd.c b/drivers/base/dd.c index
  35fa368..c9a61ea 100644
  --- a/drivers/base/dd.c
  +++ b/drivers/base/dd.c
  @@ -389,7 +389,7 @@ static int __device_attach(struct device_driver
  *drv, void *data)  {
  struct device *dev = data;
 
  -   if (!driver_match_device(drv, dev))
  +   if (!drv-explicit_bind_only  !driver_match_device(drv,
  + dev))
  return 0;
 
 if (drv-explicit_bind_only || !driver_match_device(drv, dev))
   return 0;

Scott, 
I am trying to understand what you are proposing here (example DEVICE can be 
handled by DRIVER1 and VFIO-PLATFORM-DRIVER):
 - By default drv-explicit_bind_only will be clear in all drivers.
 - By default device-explicit_bind_only will also be clear for all devices.
 - On boot, matching devices will bound to the respective driver (DEVICE == 
DRIVER1).
   This will never bound with VFIO-PLATFORM-DRIVER. So far same as before.
 - Via Sysfs interface set drv-explicit_bind_only for VFIO-PLATFORM-DRIVER.
 - Then for the devices user want, set device-explicit_bind_only.
 - unbind DEVICE from DRIVER1
 - bind DEVICE with VFIO-PLATFORM-DRIVER. This time it will be successful 
because (device-explicit_bind_only  drv-explicit_bind_only) is set.
 - Now when done, unbind the DEVICE from VFIO-PLATFORM-DRIVER.
 - Now user can re-bind the device with either DRIVER1 or VFIO-PLATFORM-DRIVER.
 - Now once drv-explicit_bind_only is set in VFIO-PLATFORM-DRIVER, and a new 
device comes (device - hotplug) then can gets bound to matching drive and not 
with VFIO-PLATFORM-DRIVER.

This looks ok to me :)

Thanks
-Bharat
 
  return driver_probe_device(drv, dev); @@ -450,7 +450,7 @@
  static int __driver_attach(struct device *dev, void *data)
   * is an error.
   */
 
  -   if (!driver_match_device(drv, dev))
  +   if (!drv-explicit_bind_only  !driver_match_device(drv,
  + dev))
  return 0;
 
 Likewise -- or error out earlier in driver_attach().
 
 Otherwise

RE: RFC: (re-)binding the VFIO platform driver to a platform device

2013-10-10 Thread Bhushan Bharat-R65777


 -Original Message-
 From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf 
 Of
 Kim Phillips
 Sent: Thursday, October 10, 2013 8:36 AM
 To: Wood Scott-B07421
 Cc: Yoder Stuart-B08248; Wood Scott-B07421; christoffer.d...@linaro.org;
 alex.william...@redhat.com; linux-ker...@vger.kernel.org;
 a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395; Bhushan
 Bharat-R65777; peter.mayd...@linaro.org; santosh.shu...@linaro.org;
 kvm@vger.kernel.org; gre...@linuxfoundation.org
 Subject: Re: RFC: (re-)binding the VFIO platform driver to a platform device
 
 On Wed, 9 Oct 2013 15:03:19 -0500
 Scott Wood scottw...@freescale.com wrote:
 
  On Wed, 2013-10-09 at 14:44 -0500, Yoder Stuart-B08248 wrote:
From: Wood Scott-B07421
Sent: Wednesday, October 09, 2013 2:22 PM
   
On Wed, 2013-10-09 at 14:02 -0500, Yoder Stuart-B08248 wrote:
 Have been thinking about this issue some more.  As Scott
 mentioned,
 
 thanks for bringing this up again.
 
There's already a bool suppress_bind_attrs to prevent sysfs
bind/unbind.  I suggested a similar flag to mean the oppsosite --
bind
*only* through sysfs.  Greg KH was skeptical and wanted to see a
patch before any further discussion.
  
   Ah, think I understand now...yes that works as well, and would be
   less intrustive.   So are you writing a patch? :)
 
  I've been meaning to since the previous round of discussion, but I've
  been busy.  Would someone else be able to test it in the context of
  using it for VFIO?
 
 yes - see below.
 
  Otherwise, that looks about right, for the driver side (though
  driver_attach could error out earlier rather than testing it inside
  the loop).
 
 I've made the changes you suggested and tested the resulting diff below on an
 arndale board.  I successfully performed the following sequence of commands
 after first changing the i2c@12C8 node in the device tree to be 
 exclusively
 compatible with vfio:
 
 ===
 # ls -l /sys/bus/platform/drivers/vfio-platform/
 total 0
 --w--- 1 root root 4096 Sep 24 19:17 bind
 --w--- 1 root root 4096 Sep 24 19:13 uevent
 --w--- 1 root root 4096 Sep 24 19:18 unbind # ls -l
 /sys/bus/platform/drivers/s3c-i2c total 0
 lrwxrwxrwx 1 root root0 Sep 24 19:11 12c6.i2c -
 ../../../../devices/12c6.i2c
 lrwxrwxrwx 1 root root0 Sep 24 19:11 12c9.i2c -
 ../../../../devices/12c9.i2c
 lrwxrwxrwx 1 root root0 Sep 24 19:20 12ce.i2c -
 ../../../../devices/12ce.i2c
 --w--- 1 root root 4096 Sep 24 19:18 bind
 --w--- 1 root root 4096 Sep 24 19:11 uevent
 --w--- 1 root root 4096 Sep 24 19:17 unbind # ls -l
 /sys/devices/12c8.i2c/driver  # this is the one with the 'vfio' compatible
 ls: cannot access /sys/devices/12c8.i2c/driver: No such file or directory 
 #
 ls -l /sys/devices/12ce.i2c/driver lrwxrwxrwx 1 root root 0 Sep 24 19:18
 /sys/devices/12ce.i2c/driver - ../../bus/platform/drivers/s3c-i2c
 # echo 12ce.i2c  /sys/bus/platform/drivers/s3c-i2c/unbind
 # ls -l /sys/devices/12ce.i2c/driver
 ls: cannot access /sys/devices/12ce.i2c/driver: No such file or directory 
 #
 echo 12ce.i2c  /sys/bus/platform/drivers/vfio-platform/bind
 # ls -l /sys/devices/12ce.i2c/driver lrwxrwxrwx 1 root root 0 Sep 24 19:21
 /sys/devices/12ce.i2c/driver - ../../bus/platform/drivers/vfio-platform
 # echo 12ce.i2c  /sys/bus/platform/drivers/vfio-platform/unbind
 # ls -l /sys/devices/12ce.i2c/driver # echo 12ce.i2c 
 /sys/bus/platform/drivers/s3c-i2c/bind
 [  722.137524] s3c-i2c 12ce.i2c: slave address 0x38 [  722.141037] s3c-i2c
 12ce.i2c: bus frequency set to 65 KHz [  722.150605] s3c-i2c 12ce.i2c:
 i2c-8: S3C I2C adapter # ls -l /sys/devices/12ce.i2c/driver lrwxrwxrwx 1
 root root 0 Sep 24 19:21 /sys/devices/12ce.i2c/driver -
 ../../bus/platform/drivers/s3c-i2c
 #
 
 
 so it's correctly not allowing 'vfio' driver to bind to a device tree 
 compatible
 it's declared, and it then can bind the i2c @ 12ce device to the vfio-
 platform driver, and unbind and bind it back to the i2c driver.
 
 For clarity's sake, before this diff, the command:
 
 echo 12ce.i2c  /sys/bus/platform/drivers/vfio-platform/bind
 
 would error with:
 
 echo: write error: No such device
 
  The other half of fixing the raciness is to ensure that the device
  doesn't get bound back to a non-VFIO driver (e.g. due to a module load
  or new_id).  The solution I proposed for that was a similar
  explicit-bind-only flag for a device, that the user sets through sysfs
  prior to unbinding.  This would also be useful in non-VFIO contexts to
  simply say I don't want to use this device at all.
 
 I can take a look at doing this if you're still busy.
 
 Thanks,
 
 Kim
 
 diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 73f6c29..da81442
 100644
 --- a/drivers/base/bus.c
 +++ b/drivers/base/bus.c
 @@ -201,7 +201,8 @@ static ssize_t bind_store(struct

RE: [PATCH 2/2] kvm: ppc: booke: check range page invalidation progress on page setup

2013-10-10 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Paolo Bonzini [mailto:paolo.bonz...@gmail.com] On Behalf Of Paolo 
 Bonzini
 Sent: Monday, October 07, 2013 5:35 PM
 To: Alexander Graf
 Cc: Bhushan Bharat-R65777; Paul Mackerras; Wood Scott-B07421; kvm-
 p...@vger.kernel.org; kvm@vger.kernel.org mailing list; Bhushan Bharat-R65777;
 Gleb Natapov
 Subject: Re: [PATCH 2/2] kvm: ppc: booke: check range page invalidation 
 progress
 on page setup
 
 Il 04/10/2013 15:38, Alexander Graf ha scritto:
 
  On 07.08.2013, at 12:03, Bharat Bhushan wrote:
 
  When the MM code is invalidating a range of pages, it calls the KVM
  kvm_mmu_notifier_invalidate_range_start() notifier function, which calls
  kvm_unmap_hva_range(), which arranges to flush all the TLBs for guest 
  pages.
  However, the Linux PTEs for the range being flushed are still valid at
  that point.  We are not supposed to establish any new references to pages
  in the range until the ...range_end() notifier gets called.
  The PPC-specific KVM code doesn't get any explicit notification of that;
  instead, we are supposed to use mmu_notifier_retry() to test whether we
  are or have been inside a range flush notifier pair while we have been
  referencing a page.
 
  This patch calls the mmu_notifier_retry() while mapping the guest
  page to ensure we are not referencing a page when in range invalidation.
 
  This call is inside a region locked with kvm-mmu_lock, which is the
  same lock that is called by the KVM MMU notifier functions, thus
  ensuring that no new notification can proceed while we are in the
  locked region.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 
  Acked-by: Alexander Graf ag...@suse.de
 
  Gleb, Paolo, please queue for 3.12 directly.
 
 Here is the backport.  The second hunk has a nontrivial conflict, so
 someone please give their {Tested,Reviewed,Compiled}-by.

{Compiled,Reviewed}-by: Bharat Bhushan bharat.bhus...@freescale.com

Thanks
-Bharat

 
 Paolo
 
 diff --git a/arch/powerpc/kvm/e500_mmu_host.c 
 b/arch/powerpc/kvm/e500_mmu_host.c
 index 1c6a9d7..c65593a 100644
 --- a/arch/powerpc/kvm/e500_mmu_host.c
 +++ b/arch/powerpc/kvm/e500_mmu_host.c
 @@ -332,6 +332,13 @@ static inline int kvmppc_e500_shadow_map(struct
 kvmppc_vcpu_e500 *vcpu_e500,
   unsigned long hva;
   int pfnmap = 0;
   int tsize = BOOK3E_PAGESZ_4K;
 + int ret = 0;
 + unsigned long mmu_seq;
 + struct kvm *kvm = vcpu_e500-vcpu.kvm;
 +
 + /* used to check for invalidations in progress */
 + mmu_seq = kvm-mmu_notifier_seq;
 + smp_rmb();
 
   /*
* Translate guest physical to true physical, acquiring
 @@ -449,6 +456,12 @@ static inline int kvmppc_e500_shadow_map(struct
 kvmppc_vcpu_e500 *vcpu_e500,
   gvaddr = ~((tsize_pages  PAGE_SHIFT) - 1);
   }
 
 + spin_lock(kvm-mmu_lock);
 + if (mmu_notifier_retry(kvm, mmu_seq)) {
 + ret = -EAGAIN;
 + goto out;
 + }
 +
   kvmppc_e500_ref_setup(ref, gtlbe, pfn);
 
   kvmppc_e500_setup_stlbe(vcpu_e500-vcpu, gtlbe, tsize,
 @@ -457,10 +470,13 @@ static inline int kvmppc_e500_shadow_map(struct
 kvmppc_vcpu_e500 *vcpu_e500,
   /* Clear i-cache for new pages */
   kvmppc_mmu_flush_icache(pfn);
 
 +out:
 + spin_unlock(kvm-mmu_lock);
 +
   /* Drop refcount on page, so that mmu notifiers can clear it */
   kvm_release_pfn_clean(pfn);
 
 - return 0;
 + return ret;
  }
 
  /* XXX only map the one-one case, for now use TLB0 */
 
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 3/4] kvm: powerpc: define a linux pte lookup function

2013-10-10 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Paul Mackerras [mailto:pau...@samba.org]
 Sent: Thursday, October 10, 2013 4:06 PM
 To: Wood Scott-B07421
 Cc: Bhushan Bharat-R65777; Wood Scott-B07421; ag...@suse.de; Yoder Stuart-
 B08248; kvm@vger.kernel.org; kvm-...@vger.kernel.org
 Subject: Re: [PATCH 3/4] kvm: powerpc: define a linux pte lookup function
 
 On Wed, Oct 09, 2013 at 12:47:31PM -0500, Scott Wood wrote:
  On Wed, 2013-10-09 at 03:48 -0500, Bhushan Bharat-R65777 wrote:
  
   What lookup_linux_pte_and_update() does:-
- find_linux_pte_or_hugepte()
- does size and some other trivial checks
- Then atomically update the pte:-
  = while()
  = wait till _PAGE_BUSY is clear
  = atomically update the pte
  = if not updated then go back to while() above else break
  
  
   While what lookup_linux_pte() does:-
- find_linux_pte_or_hugepte()
- does size and some other trivial checks
- wait till _PAGE_BUSY is clear
- return pte
  
   I am finding it difficult to call lookup_linux_pte() from
 lookup_linux_pte_and_update().
 
  You could factor out a common lookup_linux_ptep().
 
 I don't really think it's enough code to be worth wringing out the last drop 
 of
 duplication.  However, if he removed the checks for _PAGE_BUSY and 
 _PAGE_PRESENT
 as I suggested in another mail, and made it return the pte pointer rather than
 the value, it would then essentially be a lookup_linux_ptep() as you suggest.

Do we want to have lookup_linux_pte() or  lookup_linux_ptep() or both where 
lookup_linux_pte() and lookup_linux_pte_and_update() calls lookup_linux_ptep() ?

-Bharat

 
 Paul.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: RFC: (re-)binding the VFIO platform driver to a platform device

2013-10-10 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Thursday, October 10, 2013 8:53 PM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; Yoder Stuart-B08248; Kim Phillips; Christoffer Dall; 
 Alex
 Williamson; linux-ker...@vger.kernel.org; a.mota...@virtualopensystems.com;
 ag...@suse.de; Sethi Varun-B16395; peter.mayd...@linaro.org;
 santosh.shu...@linaro.org; kvm@vger.kernel.org; gre...@linuxfoundation.org
 Subject: Re: RFC: (re-)binding the VFIO platform driver to a platform device
 
 On Thu, 2013-10-10 at 02:45 -0500, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Wood Scott-B07421
   Sent: Thursday, October 10, 2013 1:33 AM
   To: Yoder Stuart-B08248
   Cc: Wood Scott-B07421; Kim Phillips; Christoffer Dall; Alex
   Williamson; linux- ker...@vger.kernel.org;
   a.mota...@virtualopensystems.com; ag...@suse.de; Sethi Varun-B16395;
   Bhushan Bharat-R65777; peter.mayd...@linaro.org;
   santosh.shu...@linaro.org; kvm@vger.kernel.org;
   gre...@linuxfoundation.org
   Subject: Re: RFC: (re-)binding the VFIO platform driver to a
   platform device
  
   On Wed, 2013-10-09 at 14:44 -0500, Yoder Stuart-B08248 wrote:
Ah, think I understand now...yes that works as well, and would be
less intrustive.   So are you writing a patch? :)
  
   I've been meaning to since the previous round of discussion, but I've been
 busy.
   Would someone else be able to test it in the context of using it for VFIO?
 
  I wish I could have but I do not have vfio-platform stuff.
 
 VFIO PCI without new_id would also be a useful test.

I will do that :)

-Bharat

 
 -Scott
 



RE: [PATCH 2/2] kvm: ppc: booke: check range page invalidation progress on page setup

2013-10-10 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Paolo Bonzini [mailto:paolo.bonz...@gmail.com] On Behalf Of Paolo 
 Bonzini
 Sent: Monday, October 07, 2013 5:35 PM
 To: Alexander Graf
 Cc: Bhushan Bharat-R65777; Paul Mackerras; Wood Scott-B07421; kvm-
 p...@vger.kernel.org; k...@vger.kernel.org mailing list; Bhushan 
 Bharat-R65777;
 Gleb Natapov
 Subject: Re: [PATCH 2/2] kvm: ppc: booke: check range page invalidation 
 progress
 on page setup
 
 Il 04/10/2013 15:38, Alexander Graf ha scritto:
 
  On 07.08.2013, at 12:03, Bharat Bhushan wrote:
 
  When the MM code is invalidating a range of pages, it calls the KVM
  kvm_mmu_notifier_invalidate_range_start() notifier function, which calls
  kvm_unmap_hva_range(), which arranges to flush all the TLBs for guest 
  pages.
  However, the Linux PTEs for the range being flushed are still valid at
  that point.  We are not supposed to establish any new references to pages
  in the range until the ...range_end() notifier gets called.
  The PPC-specific KVM code doesn't get any explicit notification of that;
  instead, we are supposed to use mmu_notifier_retry() to test whether we
  are or have been inside a range flush notifier pair while we have been
  referencing a page.
 
  This patch calls the mmu_notifier_retry() while mapping the guest
  page to ensure we are not referencing a page when in range invalidation.
 
  This call is inside a region locked with kvm-mmu_lock, which is the
  same lock that is called by the KVM MMU notifier functions, thus
  ensuring that no new notification can proceed while we are in the
  locked region.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 
  Acked-by: Alexander Graf ag...@suse.de
 
  Gleb, Paolo, please queue for 3.12 directly.
 
 Here is the backport.  The second hunk has a nontrivial conflict, so
 someone please give their {Tested,Reviewed,Compiled}-by.

{Compiled,Reviewed}-by: Bharat Bhushan bharat.bhus...@freescale.com

Thanks
-Bharat

 
 Paolo
 
 diff --git a/arch/powerpc/kvm/e500_mmu_host.c 
 b/arch/powerpc/kvm/e500_mmu_host.c
 index 1c6a9d7..c65593a 100644
 --- a/arch/powerpc/kvm/e500_mmu_host.c
 +++ b/arch/powerpc/kvm/e500_mmu_host.c
 @@ -332,6 +332,13 @@ static inline int kvmppc_e500_shadow_map(struct
 kvmppc_vcpu_e500 *vcpu_e500,
   unsigned long hva;
   int pfnmap = 0;
   int tsize = BOOK3E_PAGESZ_4K;
 + int ret = 0;
 + unsigned long mmu_seq;
 + struct kvm *kvm = vcpu_e500-vcpu.kvm;
 +
 + /* used to check for invalidations in progress */
 + mmu_seq = kvm-mmu_notifier_seq;
 + smp_rmb();
 
   /*
* Translate guest physical to true physical, acquiring
 @@ -449,6 +456,12 @@ static inline int kvmppc_e500_shadow_map(struct
 kvmppc_vcpu_e500 *vcpu_e500,
   gvaddr = ~((tsize_pages  PAGE_SHIFT) - 1);
   }
 
 + spin_lock(kvm-mmu_lock);
 + if (mmu_notifier_retry(kvm, mmu_seq)) {
 + ret = -EAGAIN;
 + goto out;
 + }
 +
   kvmppc_e500_ref_setup(ref, gtlbe, pfn);
 
   kvmppc_e500_setup_stlbe(vcpu_e500-vcpu, gtlbe, tsize,
 @@ -457,10 +470,13 @@ static inline int kvmppc_e500_shadow_map(struct
 kvmppc_vcpu_e500 *vcpu_e500,
   /* Clear i-cache for new pages */
   kvmppc_mmu_flush_icache(pfn);
 
 +out:
 + spin_unlock(kvm-mmu_lock);
 +
   /* Drop refcount on page, so that mmu notifiers can clear it */
   kvm_release_pfn_clean(pfn);
 
 - return 0;
 + return ret;
  }
 
  /* XXX only map the one-one case, for now use TLB0 */
 
 


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 3/4] kvm: powerpc: define a linux pte lookup function

2013-10-10 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Paul Mackerras [mailto:pau...@samba.org]
 Sent: Thursday, October 10, 2013 4:06 PM
 To: Wood Scott-B07421
 Cc: Bhushan Bharat-R65777; Wood Scott-B07421; ag...@suse.de; Yoder Stuart-
 B08248; k...@vger.kernel.org; kvm-ppc@vger.kernel.org
 Subject: Re: [PATCH 3/4] kvm: powerpc: define a linux pte lookup function
 
 On Wed, Oct 09, 2013 at 12:47:31PM -0500, Scott Wood wrote:
  On Wed, 2013-10-09 at 03:48 -0500, Bhushan Bharat-R65777 wrote:
  
   What lookup_linux_pte_and_update() does:-
- find_linux_pte_or_hugepte()
- does size and some other trivial checks
- Then atomically update the pte:-
  = while()
  = wait till _PAGE_BUSY is clear
  = atomically update the pte
  = if not updated then go back to while() above else break
  
  
   While what lookup_linux_pte() does:-
- find_linux_pte_or_hugepte()
- does size and some other trivial checks
- wait till _PAGE_BUSY is clear
- return pte
  
   I am finding it difficult to call lookup_linux_pte() from
 lookup_linux_pte_and_update().
 
  You could factor out a common lookup_linux_ptep().
 
 I don't really think it's enough code to be worth wringing out the last drop 
 of
 duplication.  However, if he removed the checks for _PAGE_BUSY and 
 _PAGE_PRESENT
 as I suggested in another mail, and made it return the pte pointer rather than
 the value, it would then essentially be a lookup_linux_ptep() as you suggest.

Do we want to have lookup_linux_pte() or  lookup_linux_ptep() or both where 
lookup_linux_pte() and lookup_linux_pte_and_update() calls lookup_linux_ptep() ?

-Bharat

 
 Paul.


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 3/4] kvm: powerpc: define a linux pte lookup function

2013-10-09 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Wednesday, October 09, 2013 3:07 AM
 To: Bhushan Bharat-R65777
 Cc: ag...@suse.de; Yoder Stuart-B08248; kvm@vger.kernel.org; kvm-
 p...@vger.kernel.org; pau...@samba.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 3/4] kvm: powerpc: define a linux pte lookup function
 
 On Tue, 2013-10-08 at 11:33 +0530, Bharat Bhushan wrote:
  We need to search linux pte to get pte attributes for setting TLB
  in KVM.
  This patch defines a linux_pte_lookup() function for same.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
   arch/powerpc/include/asm/pgtable.h |   35 
  +++
   1 files changed, 35 insertions(+), 0 deletions(-)
 
  diff --git a/arch/powerpc/include/asm/pgtable.h
  b/arch/powerpc/include/asm/pgtable.h
  index 7d6eacf..fd26c04 100644
  --- a/arch/powerpc/include/asm/pgtable.h
  +++ b/arch/powerpc/include/asm/pgtable.h
  @@ -223,6 +223,41 @@ extern int gup_hugepte(pte_t *ptep, unsigned long
  sz, unsigned long addr,  #endif  pte_t
  *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
   unsigned *shift);
  +
  +static inline pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
  +unsigned long *pte_sizep)
  +{
  +   pte_t *ptep;
  +   pte_t pte;
  +   unsigned long ps = *pte_sizep;
  +   unsigned int shift;
  +
  +   ptep = find_linux_pte_or_hugepte(pgdir, hva, shift);
  +   if (!ptep)
  +   return __pte(0);
  +   if (shift)
  +   *pte_sizep = 1ul  shift;
  +   else
  +   *pte_sizep = PAGE_SIZE;
  +
  +   if (ps  *pte_sizep)
  +   return __pte(0);
  +
  +   /* wait until _PAGE_BUSY is clear */
  +   while (1) {
  +   pte = pte_val(*ptep);
  +   if (unlikely(pte  _PAGE_BUSY)) {
  +   cpu_relax();
  +   continue;
  +   }
  +   }
  +
  +   /* If pte is not present return None */
  +   if (unlikely(!(pte  _PAGE_PRESENT)))
  +   return __pte(0);
  +
  +   return pte;
  +}
 
 Can lookup_linux_pte_and_update() call lookup_linux_pte()?

What lookup_linux_pte_and_update() does:-
 - find_linux_pte_or_hugepte()
 - does size and some other trivial checks
 - Then atomically update the pte:-
   = while()
   = wait till _PAGE_BUSY is clear
   = atomically update the pte
   = if not updated then go back to while() above else break


While what lookup_linux_pte() does:-
 - find_linux_pte_or_hugepte()
 - does size and some other trivial checks
 - wait till _PAGE_BUSY is clear
 - return pte

I am finding it difficult to call lookup_linux_pte() from 
lookup_linux_pte_and_update().

Thanks
-Bharat

 
 -Scott
 



RE: [PATCH 3/4] kvm: powerpc: define a linux pte lookup function

2013-10-09 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Wednesday, October 09, 2013 3:07 AM
 To: Bhushan Bharat-R65777
 Cc: ag...@suse.de; Yoder Stuart-B08248; k...@vger.kernel.org; kvm-
 p...@vger.kernel.org; pau...@samba.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 3/4] kvm: powerpc: define a linux pte lookup function
 
 On Tue, 2013-10-08 at 11:33 +0530, Bharat Bhushan wrote:
  We need to search linux pte to get pte attributes for setting TLB
  in KVM.
  This patch defines a linux_pte_lookup() function for same.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
   arch/powerpc/include/asm/pgtable.h |   35 
  +++
   1 files changed, 35 insertions(+), 0 deletions(-)
 
  diff --git a/arch/powerpc/include/asm/pgtable.h
  b/arch/powerpc/include/asm/pgtable.h
  index 7d6eacf..fd26c04 100644
  --- a/arch/powerpc/include/asm/pgtable.h
  +++ b/arch/powerpc/include/asm/pgtable.h
  @@ -223,6 +223,41 @@ extern int gup_hugepte(pte_t *ptep, unsigned long
  sz, unsigned long addr,  #endif  pte_t
  *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
   unsigned *shift);
  +
  +static inline pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
  +unsigned long *pte_sizep)
  +{
  +   pte_t *ptep;
  +   pte_t pte;
  +   unsigned long ps = *pte_sizep;
  +   unsigned int shift;
  +
  +   ptep = find_linux_pte_or_hugepte(pgdir, hva, shift);
  +   if (!ptep)
  +   return __pte(0);
  +   if (shift)
  +   *pte_sizep = 1ul  shift;
  +   else
  +   *pte_sizep = PAGE_SIZE;
  +
  +   if (ps  *pte_sizep)
  +   return __pte(0);
  +
  +   /* wait until _PAGE_BUSY is clear */
  +   while (1) {
  +   pte = pte_val(*ptep);
  +   if (unlikely(pte  _PAGE_BUSY)) {
  +   cpu_relax();
  +   continue;
  +   }
  +   }
  +
  +   /* If pte is not present return None */
  +   if (unlikely(!(pte  _PAGE_PRESENT)))
  +   return __pte(0);
  +
  +   return pte;
  +}
 
 Can lookup_linux_pte_and_update() call lookup_linux_pte()?

What lookup_linux_pte_and_update() does:-
 - find_linux_pte_or_hugepte()
 - does size and some other trivial checks
 - Then atomically update the pte:-
   = while()
   = wait till _PAGE_BUSY is clear
   = atomically update the pte
   = if not updated then go back to while() above else break


While what lookup_linux_pte() does:-
 - find_linux_pte_or_hugepte()
 - does size and some other trivial checks
 - wait till _PAGE_BUSY is clear
 - return pte

I am finding it difficult to call lookup_linux_pte() from 
lookup_linux_pte_and_update().

Thanks
-Bharat

 
 -Scott
 



RE: [PATCH 1/7] powerpc: Add interface to get msi region information

2013-10-08 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Wood Scott-B07421
> Sent: Wednesday, October 09, 2013 4:27 AM
> To: Bhushan Bharat-R65777
> Cc: alex.william...@redhat.com; j...@8bytes.org; b...@kernel.crashing.org;
> ga...@kernel.crashing.org; linux-kernel@vger.kernel.org; linuxppc-
> d...@lists.ozlabs.org; linux-...@vger.kernel.org; ag...@suse.de;
> io...@lists.linux-foundation.org; Bhushan Bharat-R65777
> Subject: Re: [PATCH 1/7] powerpc: Add interface to get msi region information
> 
> On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > @@ -376,6 +405,7 @@ static int fsl_of_msi_probe(struct platform_device *dev)
> > int len;
> > u32 offset;
> > static const u32 all_avail[] = { 0, NR_MSI_IRQS };
> > +   static int bank_index;
> >
> > match = of_match_device(fsl_of_msi_ids, >dev);
> > if (!match)
> > @@ -419,8 +449,8 @@ static int fsl_of_msi_probe(struct platform_device *dev)
> > dev->dev.of_node->full_name);
> > goto error_out;
> > }
> > -   msi->msiir_offset =
> > -   features->msiir_offset + (res.start & 0xf);
> > +   msi->msiir = res.start + features->msiir_offset;
> > +   printk("msi->msiir = %llx\n", msi->msiir);
> 
> dev_dbg or remove

Oops, sorry it was leftover of debugging :(

> 
> > }
> >
> > msi->feature = features->fsl_pic_ip; @@ -470,6 +500,7 @@ static int
> > fsl_of_msi_probe(struct platform_device *dev)
> > }
> > }
> >
> > +   msi->bank_index = bank_index++;
> 
> What if multiple MSIs are boing probed in parallel?

Ohh, I have not thought that it can be called in parallel

>  bank_index is not atomic.

Will declare bank_intex as atomic_t and use atomic_inc_return(_index)

> 
> > diff --git a/arch/powerpc/sysdev/fsl_msi.h
> > b/arch/powerpc/sysdev/fsl_msi.h index 8225f86..6bd5cfc 100644
> > --- a/arch/powerpc/sysdev/fsl_msi.h
> > +++ b/arch/powerpc/sysdev/fsl_msi.h
> > @@ -29,12 +29,19 @@ struct fsl_msi {
> > struct irq_domain *irqhost;
> >
> > unsigned long cascade_irq;
> > -
> > -   u32 msiir_offset; /* Offset of MSIIR, relative to start of CCSR */
> > +   dma_addr_t msiir; /* MSIIR Address in CCSR */
> 
> Are you sure dma_addr_t is right here, versus phys_addr_t?  It implies that 
> it's
> the output of the DMA API, but I don't think the DMA API is used in the MSI
> driver.  Perhaps it should be, but we still want the raw physical address to
> pass on to VFIO.

Looking through the conversation I will make this phys_addr_t

> 
> > void __iomem *msi_regs;
> > u32 feature;
> > int msi_virqs[NR_MSI_REG];
> >
> > +   /*
> > +* During probe each bank is assigned a index number.
> > +* index number ranges from 0 to 2^32.
> > +* Example  MSI bank 1 = 0
> > +* MSI bank 2 = 1, and so on.
> > +*/
> > +   int bank_index;
> 
> 2^32 doesn't fit in "int" (nor does 2^32 - 1).

Right :(

> 
> Just say that indices start at 0.

Will correct this

Thanks
-Bharat

> 
> -Scott
> 



RE: [PATCH 1/7] powerpc: Add interface to get msi region information

2013-10-08 Thread Bhushan Bharat-R65777


> -Original Message-
> From: j...@8bytes.org [mailto:j...@8bytes.org]
> Sent: Tuesday, October 08, 2013 10:32 PM
> To: Bjorn Helgaas
> Cc: Bhushan Bharat-R65777; alex.william...@redhat.com; 
> b...@kernel.crashing.org;
> ga...@kernel.crashing.org; linux-kernel@vger.kernel.org; linuxppc-
> d...@lists.ozlabs.org; linux-...@vger.kernel.org; ag...@suse.de; Wood Scott-
> B07421; io...@lists.linux-foundation.org
> Subject: Re: [PATCH 1/7] powerpc: Add interface to get msi region information
> 
> On Tue, Oct 08, 2013 at 10:47:49AM -0600, Bjorn Helgaas wrote:
> > I still have no idea what an "aperture type IOMMU" is, other than that
> > it is "different."
> 
> An aperture based IOMMU is basically any GART-like IOMMU which can only remap 
> a
> small window (the aperture) of the DMA address space. DMA outside of that 
> window
> is either blocked completly or passed through untranslated.

It is completely blocked for Freescale PAMU. 
So for this type of iommu what we have to do is to create a MSI mapping just 
after guest physical address, Example: guest have a 512M of memory then we 
create window of 1G (because of power of 2 requirement), then we have to FIT 
MSI just after 512M of guest.
And for that we need
1) to know the physical address of MSI's in interrupt controller (for 
that this patch was all about of).

2) When guest enable MSI interrupt then we write MSI-address and 
MSI-DATA in device. The discussion with Alex Williamson is about that interface.

Thanks
-Bharat

> 
> 
>   Joerg
> 
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 1/7] powerpc: Add interface to get msi region information

2013-10-08 Thread Bhushan Bharat-R65777


 -Original Message-
 From: j...@8bytes.org [mailto:j...@8bytes.org]
 Sent: Tuesday, October 08, 2013 10:32 PM
 To: Bjorn Helgaas
 Cc: Bhushan Bharat-R65777; alex.william...@redhat.com; 
 b...@kernel.crashing.org;
 ga...@kernel.crashing.org; linux-kernel@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org; linux-...@vger.kernel.org; ag...@suse.de; Wood Scott-
 B07421; io...@lists.linux-foundation.org
 Subject: Re: [PATCH 1/7] powerpc: Add interface to get msi region information
 
 On Tue, Oct 08, 2013 at 10:47:49AM -0600, Bjorn Helgaas wrote:
  I still have no idea what an aperture type IOMMU is, other than that
  it is different.
 
 An aperture based IOMMU is basically any GART-like IOMMU which can only remap 
 a
 small window (the aperture) of the DMA address space. DMA outside of that 
 window
 is either blocked completly or passed through untranslated.

It is completely blocked for Freescale PAMU. 
So for this type of iommu what we have to do is to create a MSI mapping just 
after guest physical address, Example: guest have a 512M of memory then we 
create window of 1G (because of power of 2 requirement), then we have to FIT 
MSI just after 512M of guest.
And for that we need
1) to know the physical address of MSI's in interrupt controller (for 
that this patch was all about of).

2) When guest enable MSI interrupt then we write MSI-address and 
MSI-DATA in device. The discussion with Alex Williamson is about that interface.

Thanks
-Bharat

 
 
   Joerg
 
 


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 1/7] powerpc: Add interface to get msi region information

2013-10-08 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Wednesday, October 09, 2013 4:27 AM
 To: Bhushan Bharat-R65777
 Cc: alex.william...@redhat.com; j...@8bytes.org; b...@kernel.crashing.org;
 ga...@kernel.crashing.org; linux-kernel@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org; linux-...@vger.kernel.org; ag...@suse.de;
 io...@lists.linux-foundation.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 1/7] powerpc: Add interface to get msi region information
 
 On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
  @@ -376,6 +405,7 @@ static int fsl_of_msi_probe(struct platform_device *dev)
  int len;
  u32 offset;
  static const u32 all_avail[] = { 0, NR_MSI_IRQS };
  +   static int bank_index;
 
  match = of_match_device(fsl_of_msi_ids, dev-dev);
  if (!match)
  @@ -419,8 +449,8 @@ static int fsl_of_msi_probe(struct platform_device *dev)
  dev-dev.of_node-full_name);
  goto error_out;
  }
  -   msi-msiir_offset =
  -   features-msiir_offset + (res.start  0xf);
  +   msi-msiir = res.start + features-msiir_offset;
  +   printk(msi-msiir = %llx\n, msi-msiir);
 
 dev_dbg or remove

Oops, sorry it was leftover of debugging :(

 
  }
 
  msi-feature = features-fsl_pic_ip; @@ -470,6 +500,7 @@ static int
  fsl_of_msi_probe(struct platform_device *dev)
  }
  }
 
  +   msi-bank_index = bank_index++;
 
 What if multiple MSIs are boing probed in parallel?

Ohh, I have not thought that it can be called in parallel

  bank_index is not atomic.

Will declare bank_intex as atomic_t and use atomic_inc_return(bank_index)

 
  diff --git a/arch/powerpc/sysdev/fsl_msi.h
  b/arch/powerpc/sysdev/fsl_msi.h index 8225f86..6bd5cfc 100644
  --- a/arch/powerpc/sysdev/fsl_msi.h
  +++ b/arch/powerpc/sysdev/fsl_msi.h
  @@ -29,12 +29,19 @@ struct fsl_msi {
  struct irq_domain *irqhost;
 
  unsigned long cascade_irq;
  -
  -   u32 msiir_offset; /* Offset of MSIIR, relative to start of CCSR */
  +   dma_addr_t msiir; /* MSIIR Address in CCSR */
 
 Are you sure dma_addr_t is right here, versus phys_addr_t?  It implies that 
 it's
 the output of the DMA API, but I don't think the DMA API is used in the MSI
 driver.  Perhaps it should be, but we still want the raw physical address to
 pass on to VFIO.

Looking through the conversation I will make this phys_addr_t

 
  void __iomem *msi_regs;
  u32 feature;
  int msi_virqs[NR_MSI_REG];
 
  +   /*
  +* During probe each bank is assigned a index number.
  +* index number ranges from 0 to 2^32.
  +* Example  MSI bank 1 = 0
  +* MSI bank 2 = 1, and so on.
  +*/
  +   int bank_index;
 
 2^32 doesn't fit in int (nor does 2^32 - 1).

Right :(

 
 Just say that indices start at 0.

Will correct this

Thanks
-Bharat

 
 -Scott
 



RE: [PATCH 1/7] powerpc: Add interface to get msi region information

2013-10-08 Thread Bhushan Bharat-R65777


 -Original Message-
 From: j...@8bytes.org [mailto:j...@8bytes.org]
 Sent: Tuesday, October 08, 2013 10:32 PM
 To: Bjorn Helgaas
 Cc: Bhushan Bharat-R65777; alex.william...@redhat.com; 
 b...@kernel.crashing.org;
 ga...@kernel.crashing.org; linux-ker...@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org; linux-...@vger.kernel.org; ag...@suse.de; Wood Scott-
 B07421; io...@lists.linux-foundation.org
 Subject: Re: [PATCH 1/7] powerpc: Add interface to get msi region information
 
 On Tue, Oct 08, 2013 at 10:47:49AM -0600, Bjorn Helgaas wrote:
  I still have no idea what an aperture type IOMMU is, other than that
  it is different.
 
 An aperture based IOMMU is basically any GART-like IOMMU which can only remap 
 a
 small window (the aperture) of the DMA address space. DMA outside of that 
 window
 is either blocked completly or passed through untranslated.

It is completely blocked for Freescale PAMU. 
So for this type of iommu what we have to do is to create a MSI mapping just 
after guest physical address, Example: guest have a 512M of memory then we 
create window of 1G (because of power of 2 requirement), then we have to FIT 
MSI just after 512M of guest.
And for that we need
1) to know the physical address of MSI's in interrupt controller (for 
that this patch was all about of).

2) When guest enable MSI interrupt then we write MSI-address and 
MSI-DATA in device. The discussion with Alex Williamson is about that interface.

Thanks
-Bharat

 
 
   Joerg
 
 


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 1/7] powerpc: Add interface to get msi region information

2013-10-08 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Wood Scott-B07421
 Sent: Wednesday, October 09, 2013 4:27 AM
 To: Bhushan Bharat-R65777
 Cc: alex.william...@redhat.com; j...@8bytes.org; b...@kernel.crashing.org;
 ga...@kernel.crashing.org; linux-ker...@vger.kernel.org; linuxppc-
 d...@lists.ozlabs.org; linux-...@vger.kernel.org; ag...@suse.de;
 io...@lists.linux-foundation.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 1/7] powerpc: Add interface to get msi region information
 
 On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
  @@ -376,6 +405,7 @@ static int fsl_of_msi_probe(struct platform_device *dev)
  int len;
  u32 offset;
  static const u32 all_avail[] = { 0, NR_MSI_IRQS };
  +   static int bank_index;
 
  match = of_match_device(fsl_of_msi_ids, dev-dev);
  if (!match)
  @@ -419,8 +449,8 @@ static int fsl_of_msi_probe(struct platform_device *dev)
  dev-dev.of_node-full_name);
  goto error_out;
  }
  -   msi-msiir_offset =
  -   features-msiir_offset + (res.start  0xf);
  +   msi-msiir = res.start + features-msiir_offset;
  +   printk(msi-msiir = %llx\n, msi-msiir);
 
 dev_dbg or remove

Oops, sorry it was leftover of debugging :(

 
  }
 
  msi-feature = features-fsl_pic_ip; @@ -470,6 +500,7 @@ static int
  fsl_of_msi_probe(struct platform_device *dev)
  }
  }
 
  +   msi-bank_index = bank_index++;
 
 What if multiple MSIs are boing probed in parallel?

Ohh, I have not thought that it can be called in parallel

  bank_index is not atomic.

Will declare bank_intex as atomic_t and use atomic_inc_return(bank_index)

 
  diff --git a/arch/powerpc/sysdev/fsl_msi.h
  b/arch/powerpc/sysdev/fsl_msi.h index 8225f86..6bd5cfc 100644
  --- a/arch/powerpc/sysdev/fsl_msi.h
  +++ b/arch/powerpc/sysdev/fsl_msi.h
  @@ -29,12 +29,19 @@ struct fsl_msi {
  struct irq_domain *irqhost;
 
  unsigned long cascade_irq;
  -
  -   u32 msiir_offset; /* Offset of MSIIR, relative to start of CCSR */
  +   dma_addr_t msiir; /* MSIIR Address in CCSR */
 
 Are you sure dma_addr_t is right here, versus phys_addr_t?  It implies that 
 it's
 the output of the DMA API, but I don't think the DMA API is used in the MSI
 driver.  Perhaps it should be, but we still want the raw physical address to
 pass on to VFIO.

Looking through the conversation I will make this phys_addr_t

 
  void __iomem *msi_regs;
  u32 feature;
  int msi_virqs[NR_MSI_REG];
 
  +   /*
  +* During probe each bank is assigned a index number.
  +* index number ranges from 0 to 2^32.
  +* Example  MSI bank 1 = 0
  +* MSI bank 2 = 1, and so on.
  +*/
  +   int bank_index;
 
 2^32 doesn't fit in int (nor does 2^32 - 1).

Right :(

 
 Just say that indices start at 0.

Will correct this

Thanks
-Bharat

 
 -Scott
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 1/4] powerpc: Added __cmpdi2 for signed 64bit comparision

2013-10-08 Thread Bhushan Bharat-R65777
Oops it came as 1/4,
I am sorry, please ignore this

Thanks
-Bharat

 -Original Message-
 From: Bhushan Bharat-R65777
 Sent: Wednesday, October 09, 2013 10:39 AM
 To: Wood Scott-B07421; linuxppc-dev@lists.ozlabs.org; b...@kernel.crashing.org
 Cc: Bhushan Bharat-R65777; Bhushan Bharat-R65777
 Subject: [PATCH 1/4] powerpc: Added __cmpdi2 for signed 64bit comparision
 
 This was missing on powerpc and I am getting compilation error
 drivers/vfio/pci/vfio_pci_rdwr.c:193: undefined reference to `__cmpdi2'
 drivers/vfio/pci/vfio_pci_rdwr.c:193: undefined reference to `__cmpdi2'
 
 Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 ---
  arch/powerpc/kernel/misc_32.S   |   14 ++
  arch/powerpc/kernel/ppc_ksyms.c |2 ++
  2 files changed, 16 insertions(+), 0 deletions(-)
 
 diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S 
 index
 777d999..7c0eec2 100644
 --- a/arch/powerpc/kernel/misc_32.S
 +++ b/arch/powerpc/kernel/misc_32.S
 @@ -644,6 +644,20 @@ _GLOBAL(__lshrdi3)
   blr
 
  /*
 + * 64-bit comparison: __cmpdi2(s64 a, s64 b)
 + * Returns 0 if a  b, 1 if a == b, 2 if a  b.
 + */
 +_GLOBAL(__cmpdi2)
 + cmpwr3,r5
 + li  r3,1
 + bne 1f
 + cmplw   r4,r6
 + beqlr
 +1:   li  r3,0
 + bltlr
 + li  r3,2
 + blr
 +/*
   * 64-bit comparison: __ucmpdi2(u64 a, u64 b)
   * Returns 0 if a  b, 1 if a == b, 2 if a  b.
   */
 diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
 index 21646db..5674c00 100644
 --- a/arch/powerpc/kernel/ppc_ksyms.c
 +++ b/arch/powerpc/kernel/ppc_ksyms.c
 @@ -143,6 +143,8 @@ EXPORT_SYMBOL(__ashldi3);  EXPORT_SYMBOL(__lshrdi3);  int
 __ucmpdi2(unsigned long long, unsigned long long);  EXPORT_SYMBOL(__ucmpdi2);
 +int __cmpdi2(long long, long long);
 +EXPORT_SYMBOL(__cmpdi2);
  #endif
  long long __bswapdi2(long long);
  EXPORT_SYMBOL(__bswapdi2);
 --
 1.7.0.4


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-07 Thread Bhushan Bharat-R65777
> > > Do you really want module dependencies between vfio and your core
> > > kernel MSI setup?  Look at the vfio external user interface that we've
> already defined.
> > > That allows other components of the kernel to get a proper reference
> > > to a vfio group.  From there you can work out how to get what you
> > > want.  Another alternative is that vfio could register an MSI to
> > > IOVA mapping with architecture code when the mapping is created.
> > > The MSI setup path could then do a lookup in architecture code for
> > > the mapping.  You could even store the MSI to IOVA mapping in VFIO
> > > and create an interface where SET_IRQ passes that mapping into setup code.
> >
> > Ok, What I want is to get IOVA associated with a physical address
> > (physical address of MSI-bank).
> > And currently I do not see a way to know IOVA of a physical address
> > and doing all this domain get and then search through all of
> > iommu-windows of that domain.
> >
> > What if we add an iommu-API which can return the IOVA mapping of a
> > physical address. Current use case is setting up MSI's for aperture
> > type of IOMMU also getting a phys_to_iova() mapping is independent of
> > VFIO, your thought?
> 
> A physical address can be mapped to multiple IOVAs, so the interface seems
> flawed by design.  It also has the same problem as above, it's a backdoor that
> can be called asynchronous to the owner of the domain, so what reason is there
> to believe the result?  It just replaces an iommu_domain pointer with an IOVA.
> VFIO knows this mapping, so why are we trying to go behind its back and ask 
> the
> IOMMU?
IOMMU is the final place where mapping is created, so may be today it is 
calling on behalf of VFIO, tomorrow it can be for normal Linux or some other 
interface. But I am fine to directly talk to vfio and will not try to solve a 
problem which does not exists today.

MSI subsystem knows pdev (pci device) and physical address, then what interface 
it will use to get the IOVA from VFIO?

Thanks
-Bharat

>  Thanks,
> 
> Alex
> 

N�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a���
0��h���i

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-07 Thread Bhushan Bharat-R65777
   Do you really want module dependencies between vfio and your core
   kernel MSI setup?  Look at the vfio external user interface that we've
 already defined.
   That allows other components of the kernel to get a proper reference
   to a vfio group.  From there you can work out how to get what you
   want.  Another alternative is that vfio could register an MSI to
   IOVA mapping with architecture code when the mapping is created.
   The MSI setup path could then do a lookup in architecture code for
   the mapping.  You could even store the MSI to IOVA mapping in VFIO
   and create an interface where SET_IRQ passes that mapping into setup code.
 
  Ok, What I want is to get IOVA associated with a physical address
  (physical address of MSI-bank).
  And currently I do not see a way to know IOVA of a physical address
  and doing all this domain get and then search through all of
  iommu-windows of that domain.
 
  What if we add an iommu-API which can return the IOVA mapping of a
  physical address. Current use case is setting up MSI's for aperture
  type of IOMMU also getting a phys_to_iova() mapping is independent of
  VFIO, your thought?
 
 A physical address can be mapped to multiple IOVAs, so the interface seems
 flawed by design.  It also has the same problem as above, it's a backdoor that
 can be called asynchronous to the owner of the domain, so what reason is there
 to believe the result?  It just replaces an iommu_domain pointer with an IOVA.
 VFIO knows this mapping, so why are we trying to go behind its back and ask 
 the
 IOMMU?
IOMMU is the final place where mapping is created, so may be today it is 
calling on behalf of VFIO, tomorrow it can be for normal Linux or some other 
interface. But I am fine to directly talk to vfio and will not try to solve a 
problem which does not exists today.

MSI subsystem knows pdev (pci device) and physical address, then what interface 
it will use to get the IOVA from VFIO?

Thanks
-Bharat

  Thanks,
 
 Alex
 

N�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�j:+v���zZ+��+zf���h���~i���z��w���?��)ߢf��^jǫy�m��@A�a���
0��h���i

RE: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to epapr_hypercall()

2013-10-07 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, October 04, 2013 4:46 PM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to
 epapr_hypercall()
 
 
 On 04.10.2013, at 06:26, Bhushan Bharat-R65777 wrote:
 
 
 
  -Original Message-
  From: Wood Scott-B07421
  Sent: Thursday, October 03, 2013 12:04 AM
  To: Alexander Graf
  Cc: Bhushan Bharat-R65777; kvm-...@vger.kernel.org;
  kvm@vger.kernel.org; Bhushan
  Bharat-R65777
  Subject: Re: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to
  epapr_hypercall()
 
  On Wed, 2013-10-02 at 19:54 +0200, Alexander Graf wrote:
  On 02.10.2013, at 19:49, Scott Wood wrote:
 
  On Wed, 2013-10-02 at 19:46 +0200, Alexander Graf wrote:
  On 02.10.2013, at 19:42, Scott Wood wrote:
 
  On Wed, 2013-10-02 at 19:17 +0200, Alexander Graf wrote:
  On 02.10.2013, at 19:04, Scott Wood wrote:
 
  On Wed, 2013-10-02 at 18:53 +0200, Alexander Graf wrote:
  On 02.10.2013, at 18:40, Scott Wood wrote:
 
  On Wed, 2013-10-02 at 16:19 +0200, Alexander Graf wrote:
  Won't this break when CONFIG_EPAPR_PARAVIRT=n? We wouldn't
  have
  epapr_hcalls.S compiled into the code base then and the bl above
  would reference an unknown function.
 
  KVM_GUEST selects EPAPR_PARAVIRT.
 
  But you can not select KVM_GUEST and still call these inline
  functions,
  no?
 
  No.
 
  Like kvm_arch_para_features().
 
  Where does that get called without KVM_GUEST?
 
  How would that work currently, with the call to kvm_hypercall()
  in arch/powerpc/kernel/kvm.c (which calls epapr_hypercall, BTW)?
 
  It wouldn't ever get called because kvm_hypercall() ends up
  always
  returning EV_UNIMPLEMENTED when #ifndef CONFIG_KVM_GUEST.
 
  OK, so the objection is to removing that stub?  Where would we
  actually want to call this without knowing that KVM_GUEST or
  EPAPR_PARAVIRT are enabled?
 
  In probing code. I usually prefer
 
  if (kvm_feature_available(X)) {
   ...
  }
 
  over
 
  #ifdef CONFIG_KVM_GUEST
  if (kvm_feature_available(X)) {
   ...
  }
  #endif
 
  at least when I can avoid it. With the current code the compiler
  would be
  smart enough to just optimize out the complete branch.
 
  Sure.  My point is, where would you be calling that where the
  entire file isn't predicated on (or selecting) CONFIG_KVM_GUEST or 
  similar?
 
  We don't do these stubs for every single function in the kernel --
  only ones where the above is a reasonable use case.
 
  Yeah, I'm fine on dropping it, but we need to make that a conscious
  decision
  and verify that no caller relies on it.
 
  kvm_para_has_feature() is called from arch/powerpc/kernel/kvm.c,
  arch/x86/kernel/kvm.c, and arch/x86/kernel/kvmclock.c, all of which
  are enabled by CONFIG_KVM_GUEST.
 
  I did find one example of kvm_para_available() being used in an
  unexpected place
  -- sound/pci/intel8x0.c.  It defines its own non-CONFIG_KVM_GUEST
  stub, even though x86 defines kvm_para_available() using inline CPUID
  stuff which should work without CONFIG_KVM_GUEST.
  I'm not sure why it even needs to do that, though -- shouldn't the
  subsequent PCI subsystem vendor/device check should be sufficient?
  No hypercalls are involved.
 
  That said, the possibility that some random driver might want to make
  use of paravirt features is a decent argument for keeping the stub.
 
 
  I am not sure where we are agreeing on?
  Do we want to remove the stub in arch/powerpc/include/asm/kvm_para.h ? as
 there is no caller without KVM_GUEST and in future caller ensure this to be
 called only from code selected by KVM_GUEST?
 
  Or let this stub stay to avoid any random driver calling this ?
 
 I think the most reasonable way forward is to add a stub for non-CONFIG_EPAPR 
 to
 the epapr code, then replace the kvm bits with generic epapr bits (which your
 patches already do).

Please describe which stub you are talking about.

Thanks
-Bharat

 
 With that we should be 100% equivalent to today's code, just with a lot less
 lines of code :).
 
 
 Alex
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to epapr_hypercall()

2013-10-07 Thread Bhushan Bharat-R65777
  at least when I can avoid it. With the current code the compiler
  would be
  smart enough to just optimize out the complete branch.
 
  Sure.  My point is, where would you be calling that where the
  entire file isn't predicated on (or selecting) CONFIG_KVM_GUEST or
 similar?
 
  We don't do these stubs for every single function in the kernel
  -- only ones where the above is a reasonable use case.
 
  Yeah, I'm fine on dropping it, but we need to make that a
  conscious decision
  and verify that no caller relies on it.
 
  kvm_para_has_feature() is called from arch/powerpc/kernel/kvm.c,
  arch/x86/kernel/kvm.c, and arch/x86/kernel/kvmclock.c, all of which
  are enabled by CONFIG_KVM_GUEST.
 
  I did find one example of kvm_para_available() being used in an
  unexpected place
  -- sound/pci/intel8x0.c.  It defines its own non-CONFIG_KVM_GUEST
  stub, even though x86 defines kvm_para_available() using inline
  CPUID stuff which should work without CONFIG_KVM_GUEST.
  I'm not sure why it even needs to do that, though -- shouldn't the
  subsequent PCI subsystem vendor/device check should be sufficient?
  No hypercalls are involved.
 
  That said, the possibility that some random driver might want to
  make use of paravirt features is a decent argument for keeping the stub.
 
 
  I am not sure where we are agreeing on?
  Do we want to remove the stub in arch/powerpc/include/asm/kvm_para.h
  ? as
  there is no caller without KVM_GUEST and in future caller ensure this
  to be called only from code selected by KVM_GUEST?
 
  Or let this stub stay to avoid any random driver calling this ?
 
  I think the most reasonable way forward is to add a stub for
  non-CONFIG_EPAPR to the epapr code, then replace the kvm bits with
  generic epapr bits (which your patches already do).
 
  Please describe which stub you are talking about.
 
 kvm_hypercall is always available, regardless of the config option, which 
 makes
 all its subfunctions always available as well.

This patch renames kvm_hypercall() to epapr_hypercall() and which is always 
available. And the kvm_hypercall() friends now directly calls epapr_hypercall().
IIUC, So what you are trying to say is let the kvm_hypercall() friends keep on 
calling kvm_hypercall() itself and a sub something like this:

#ifdef CONFIG_KVM_GUEST
 
static unsigned long kvm_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr)
{
return epapr_hypercall(in, out. nr);
}
 
 #else
static unsigned long kvm_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr) {
 return EV_UNIMPLEMENTED;
}
-

I am still not really convinced about why we want to keep this stub where we 
know this is not called outside KVM_GUEST and calling this without KVM_GUEST is 
debatable.

Thanks
-Bharat

Thanks
-Bharat

 
 
 Alex
 
 ---
 
 #ifdef CONFIG_KVM_GUEST
 
 #include linux/of.h
 
 static inline int kvm_para_available(void) {
 struct device_node *hyper_node;
 
 hyper_node = of_find_node_by_path(/hypervisor);
 if (!hyper_node)
 return 0;
 
 if (!of_device_is_compatible(hyper_node, linux,kvm))
 return 0;
 
 return 1;
 }
 
 extern unsigned long kvm_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr);
 
 #else
 
 static inline int kvm_para_available(void) {
 return 0;
 }
 
 static unsigned long kvm_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr) {
 return EV_UNIMPLEMENTED;
 }
 
 #endif
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 2/2] arm64: KVM: Yield CPU when vcpu executes a WFE

2013-10-07 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Marc Zyngier [mailto:marc.zyng...@arm.com]
 Sent: Monday, October 07, 2013 9:11 PM
 To: linux-arm-ker...@lists.infradead.org; kvm...@lists.cs.columbia.edu;
 kvm@vger.kernel.org
 Subject: [PATCH 2/2] arm64: KVM: Yield CPU when vcpu executes a WFE
 
 On an (even slightly) oversubscribed system, spinlocks are quickly becoming a
 bottleneck, as some vcpus are spinning, waiting for a lock to be released, 
 while
 the vcpu holding the lock may not be running at all.
 
 The solution is to trap blocking WFEs and tell KVM that we're now spinning. 
 This
 ensures that other vpus will get a scheduling boost, allowing the lock to be
 released more quickly.
 
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  arch/arm64/include/asm/kvm_arm.h |  8 ++--
  arch/arm64/kvm/handle_exit.c | 18 +-
  2 files changed, 19 insertions(+), 7 deletions(-)
 
 diff --git a/arch/arm64/include/asm/kvm_arm.h 
 b/arch/arm64/include/asm/kvm_arm.h
 index a5f28e2..c98ef47 100644
 --- a/arch/arm64/include/asm/kvm_arm.h
 +++ b/arch/arm64/include/asm/kvm_arm.h
 @@ -63,6 +63,7 @@
   * TAC:  Trap ACTLR
   * TSC:  Trap SMC
   * TSW:  Trap cache operations by set/way
 + * TWE:  Trap WFE
   * TWI:  Trap WFI
   * TIDCP:Trap L2CTLR/L2ECTLR
   * BSU_IS:   Upgrade barriers to the inner shareable domain
 @@ -72,8 +73,9 @@
   * FMO:  Override CPSR.F and enable signaling with VF
   * SWIO: Turn set/way invalidates into set/way clean+invalidate
   */
 -#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | 
 \
 -  HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
 +#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
 +  HCR_BSU_IS | HCR_FB | HCR_TAC | \
 +  HCR_AMO | HCR_IMO | HCR_FMO | \
HCR_SWIO | HCR_TIDCP | HCR_RW)
  #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
 @@ -242,4 +244,6 @@
 
  #define ESR_EL2_EC_xABT_xFSR_EXTABT  0x10
 
 +#define ESR_EL2_EC_WFI_ISS_WFE   (1  0)

In another patch this is named as WHI_IS_WFE whereas here it is WFI_ISS_WFE, 
looks like typo. Anyways, what I am interested to understand is what does this 
macro means?

Thanks
-Bharat

 +
  #endif /* __ARM64_KVM_ARM_H__ */
 diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index
 9beaca03..8da5606 100644
 --- a/arch/arm64/kvm/handle_exit.c
 +++ b/arch/arm64/kvm/handle_exit.c
 @@ -47,21 +47,29 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct 
 kvm_run
 *run)  }
 
  /**
 - * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a
 guest
 + * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
 + *   instruction executed by a guest
 + *
   * @vcpu:the vcpu pointer
   *
 - * Simply call kvm_vcpu_block(), which will halt execution of
 + * WFE: Yield the CPU and come back to this vcpu when the scheduler
 + * decides to.
 + * WFI: Simply call kvm_vcpu_block(), which will halt execution of
   * world-switches and schedule other host processes until there is an
   * incoming IRQ or FIQ to the VM.
   */
 -static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
 +static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
  {
 - kvm_vcpu_block(vcpu);
 + if (kvm_vcpu_get_hsr(vcpu)  ESR_EL2_EC_WFI_ISS_WFE)
 + kvm_vcpu_on_spin(vcpu);
 + else
 + kvm_vcpu_block(vcpu);
 +
   return 1;
  }
 
  static exit_handle_fn arm_exit_handlers[] = {
 - [ESR_EL2_EC_WFI]= kvm_handle_wfi,
 + [ESR_EL2_EC_WFI]= kvm_handle_wfx,
   [ESR_EL2_EC_CP15_32]= kvm_handle_cp15_32,
   [ESR_EL2_EC_CP15_64]= kvm_handle_cp15_64,
   [ESR_EL2_EC_CP14_MR]= kvm_handle_cp14_access,
 --
 1.8.2.3
 
 
 
 ___
 kvmarm mailing list
 kvm...@lists.cs.columbia.edu
 https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to epapr_hypercall()

2013-10-07 Thread Bhushan Bharat-R65777


 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
 Behalf Of Alexander Graf
 Sent: Monday, October 07, 2013 9:16 PM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to
 epapr_hypercall()
 
 
 On 07.10.2013, at 17:43, Bhushan Bharat-R65777 r65...@freescale.com wrote:
 
  at least when I can avoid it. With the current code the
  compiler would be
  smart enough to just optimize out the complete branch.
 
  Sure.  My point is, where would you be calling that where the
  entire file isn't predicated on (or selecting) CONFIG_KVM_GUEST
  or
  similar?
 
  We don't do these stubs for every single function in the kernel
  -- only ones where the above is a reasonable use case.
 
  Yeah, I'm fine on dropping it, but we need to make that a
  conscious decision
  and verify that no caller relies on it.
 
  kvm_para_has_feature() is called from arch/powerpc/kernel/kvm.c,
  arch/x86/kernel/kvm.c, and arch/x86/kernel/kvmclock.c, all of
  which are enabled by CONFIG_KVM_GUEST.
 
  I did find one example of kvm_para_available() being used in an
  unexpected place
  -- sound/pci/intel8x0.c.  It defines its own non-CONFIG_KVM_GUEST
  stub, even though x86 defines kvm_para_available() using inline
  CPUID stuff which should work without CONFIG_KVM_GUEST.
  I'm not sure why it even needs to do that, though -- shouldn't
  the subsequent PCI subsystem vendor/device check should be sufficient?
  No hypercalls are involved.
 
  That said, the possibility that some random driver might want to
  make use of paravirt features is a decent argument for keeping the 
  stub.
 
 
  I am not sure where we are agreeing on?
  Do we want to remove the stub in
  arch/powerpc/include/asm/kvm_para.h
  ? as
  there is no caller without KVM_GUEST and in future caller ensure
  this to be called only from code selected by KVM_GUEST?
 
  Or let this stub stay to avoid any random driver calling this ?
 
  I think the most reasonable way forward is to add a stub for
  non-CONFIG_EPAPR to the epapr code, then replace the kvm bits with
  generic epapr bits (which your patches already do).
 
  Please describe which stub you are talking about.
 
  kvm_hypercall is always available, regardless of the config option,
  which makes all its subfunctions always available as well.
 
  This patch renames kvm_hypercall() to epapr_hypercall() and which is always
 available. And the kvm_hypercall() friends now directly calls 
 epapr_hypercall().
  IIUC, So what you are trying to say is let the kvm_hypercall() friends keep 
  on
 calling kvm_hypercall() itself and a sub something like this:
 
 No, what I'm saying is that we either
 
   a) drop the whole #ifndef code path consciously. This would have to be a
 separate patch with a separate discussion. It's orthogonal to combining
 kvm_hypercall() and epapr_hypercall()
 
   b) add the #ifndef path to epapr_hypercall()

Do you mean like this in arch/powerpc/include/asm/epapr_hcalls.h

#ifdef CONFIG_KVM_GUEST
static inline unsigned long epapr_hypercall(unsigned long *in,
   unsigned long *out,
   unsigned long nr)
{
 // code for this function
} 
#else
static inline unsigned long epapr_hypercall(unsigned long *in,
   unsigned long *out,
   unsigned long nr)
{
return EV_UNIMPLEMENTED;
}
#endif

 
 I prefer b, Scott prefers b.
 
 
 Alex
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body
 of a message to majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to epapr_hypercall()

2013-10-07 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Monday, October 07, 2013 9:43 PM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; kvm-...@vger.kernel.org; kvm@vger.kernel.org
 Subject: Re: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to
 epapr_hypercall()
 
 
 On 07.10.2013, at 18:04, Bhushan Bharat-R65777 r65...@freescale.com wrote:
 
 
 
  -Original Message-
  From: kvm-ppc-ow...@vger.kernel.org
  [mailto:kvm-ppc-ow...@vger.kernel.org] On Behalf Of Alexander Graf
  Sent: Monday, October 07, 2013 9:16 PM
  To: Bhushan Bharat-R65777
  Cc: Wood Scott-B07421; kvm-...@vger.kernel.org; kvm@vger.kernel.org
  Subject: Re: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to
  epapr_hypercall()
 
 
  On 07.10.2013, at 17:43, Bhushan Bharat-R65777 r65...@freescale.com 
  wrote:
 
  at least when I can avoid it. With the current code the
  compiler would be
  smart enough to just optimize out the complete branch.
 
  Sure.  My point is, where would you be calling that where the
  entire file isn't predicated on (or selecting)
  CONFIG_KVM_GUEST or
  similar?
 
  We don't do these stubs for every single function in the
  kernel
  -- only ones where the above is a reasonable use case.
 
  Yeah, I'm fine on dropping it, but we need to make that a
  conscious decision
  and verify that no caller relies on it.
 
  kvm_para_has_feature() is called from
  arch/powerpc/kernel/kvm.c, arch/x86/kernel/kvm.c, and
  arch/x86/kernel/kvmclock.c, all of which are enabled by
 CONFIG_KVM_GUEST.
 
  I did find one example of kvm_para_available() being used in an
  unexpected place
  -- sound/pci/intel8x0.c.  It defines its own
  non-CONFIG_KVM_GUEST stub, even though x86 defines
  kvm_para_available() using inline CPUID stuff which should work 
  without
 CONFIG_KVM_GUEST.
  I'm not sure why it even needs to do that, though -- shouldn't
  the subsequent PCI subsystem vendor/device check should be 
  sufficient?
  No hypercalls are involved.
 
  That said, the possibility that some random driver might want
  to make use of paravirt features is a decent argument for keeping the
 stub.
 
 
  I am not sure where we are agreeing on?
  Do we want to remove the stub in
  arch/powerpc/include/asm/kvm_para.h
  ? as
  there is no caller without KVM_GUEST and in future caller ensure
  this to be called only from code selected by KVM_GUEST?
 
  Or let this stub stay to avoid any random driver calling this ?
 
  I think the most reasonable way forward is to add a stub for
  non-CONFIG_EPAPR to the epapr code, then replace the kvm bits
  with generic epapr bits (which your patches already do).
 
  Please describe which stub you are talking about.
 
  kvm_hypercall is always available, regardless of the config option,
  which makes all its subfunctions always available as well.
 
  This patch renames kvm_hypercall() to epapr_hypercall() and which is
  always
  available. And the kvm_hypercall() friends now directly calls
 epapr_hypercall().
  IIUC, So what you are trying to say is let the kvm_hypercall()
  friends keep on
  calling kvm_hypercall() itself and a sub something like this:
 
  No, what I'm saying is that we either
 
   a) drop the whole #ifndef code path consciously. This would have to
  be a separate patch with a separate discussion. It's orthogonal to
  combining
  kvm_hypercall() and epapr_hypercall()
 
   b) add the #ifndef path to epapr_hypercall()
 
  Do you mean like this in arch/powerpc/include/asm/epapr_hcalls.h
 
  #ifdef CONFIG_KVM_GUEST
 
 CONFIG_EPAPR_PARAVIRT

Yes, I was getting confused why only KVM_GUEST as this not specific to 
KVM-GUEST.
Thank you

 
 Apart from that, yes, I think that's what we want.
 
 
 Alex
 
  static inline unsigned long epapr_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr) { // code for this
  function } #else static inline unsigned long epapr_hypercall(unsigned
  long *in,
unsigned long *out,
unsigned long nr) {
  return EV_UNIMPLEMENTED;
  }
  #endif
 
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-07 Thread Bhushan Bharat-R65777
   Do you really want module dependencies between vfio and your core
   kernel MSI setup?  Look at the vfio external user interface that we've
 already defined.
   That allows other components of the kernel to get a proper reference
   to a vfio group.  From there you can work out how to get what you
   want.  Another alternative is that vfio could register an MSI to
   IOVA mapping with architecture code when the mapping is created.
   The MSI setup path could then do a lookup in architecture code for
   the mapping.  You could even store the MSI to IOVA mapping in VFIO
   and create an interface where SET_IRQ passes that mapping into setup code.
 
  Ok, What I want is to get IOVA associated with a physical address
  (physical address of MSI-bank).
  And currently I do not see a way to know IOVA of a physical address
  and doing all this domain get and then search through all of
  iommu-windows of that domain.
 
  What if we add an iommu-API which can return the IOVA mapping of a
  physical address. Current use case is setting up MSI's for aperture
  type of IOMMU also getting a phys_to_iova() mapping is independent of
  VFIO, your thought?
 
 A physical address can be mapped to multiple IOVAs, so the interface seems
 flawed by design.  It also has the same problem as above, it's a backdoor that
 can be called asynchronous to the owner of the domain, so what reason is there
 to believe the result?  It just replaces an iommu_domain pointer with an IOVA.
 VFIO knows this mapping, so why are we trying to go behind its back and ask 
 the
 IOMMU?
IOMMU is the final place where mapping is created, so may be today it is 
calling on behalf of VFIO, tomorrow it can be for normal Linux or some other 
interface. But I am fine to directly talk to vfio and will not try to solve a 
problem which does not exists today.

MSI subsystem knows pdev (pci device) and physical address, then what interface 
it will use to get the IOVA from VFIO?

Thanks
-Bharat

  Thanks,
 
 Alex
 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to epapr_hypercall()

2013-10-07 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, October 04, 2013 4:46 PM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; k...@vger.kernel.org
 Subject: Re: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to
 epapr_hypercall()
 
 
 On 04.10.2013, at 06:26, Bhushan Bharat-R65777 wrote:
 
 
 
  -Original Message-
  From: Wood Scott-B07421
  Sent: Thursday, October 03, 2013 12:04 AM
  To: Alexander Graf
  Cc: Bhushan Bharat-R65777; kvm-ppc@vger.kernel.org;
  k...@vger.kernel.org; Bhushan
  Bharat-R65777
  Subject: Re: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to
  epapr_hypercall()
 
  On Wed, 2013-10-02 at 19:54 +0200, Alexander Graf wrote:
  On 02.10.2013, at 19:49, Scott Wood wrote:
 
  On Wed, 2013-10-02 at 19:46 +0200, Alexander Graf wrote:
  On 02.10.2013, at 19:42, Scott Wood wrote:
 
  On Wed, 2013-10-02 at 19:17 +0200, Alexander Graf wrote:
  On 02.10.2013, at 19:04, Scott Wood wrote:
 
  On Wed, 2013-10-02 at 18:53 +0200, Alexander Graf wrote:
  On 02.10.2013, at 18:40, Scott Wood wrote:
 
  On Wed, 2013-10-02 at 16:19 +0200, Alexander Graf wrote:
  Won't this break when CONFIG_EPAPR_PARAVIRT=n? We wouldn't
  have
  epapr_hcalls.S compiled into the code base then and the bl above
  would reference an unknown function.
 
  KVM_GUEST selects EPAPR_PARAVIRT.
 
  But you can not select KVM_GUEST and still call these inline
  functions,
  no?
 
  No.
 
  Like kvm_arch_para_features().
 
  Where does that get called without KVM_GUEST?
 
  How would that work currently, with the call to kvm_hypercall()
  in arch/powerpc/kernel/kvm.c (which calls epapr_hypercall, BTW)?
 
  It wouldn't ever get called because kvm_hypercall() ends up
  always
  returning EV_UNIMPLEMENTED when #ifndef CONFIG_KVM_GUEST.
 
  OK, so the objection is to removing that stub?  Where would we
  actually want to call this without knowing that KVM_GUEST or
  EPAPR_PARAVIRT are enabled?
 
  In probing code. I usually prefer
 
  if (kvm_feature_available(X)) {
   ...
  }
 
  over
 
  #ifdef CONFIG_KVM_GUEST
  if (kvm_feature_available(X)) {
   ...
  }
  #endif
 
  at least when I can avoid it. With the current code the compiler
  would be
  smart enough to just optimize out the complete branch.
 
  Sure.  My point is, where would you be calling that where the
  entire file isn't predicated on (or selecting) CONFIG_KVM_GUEST or 
  similar?
 
  We don't do these stubs for every single function in the kernel --
  only ones where the above is a reasonable use case.
 
  Yeah, I'm fine on dropping it, but we need to make that a conscious
  decision
  and verify that no caller relies on it.
 
  kvm_para_has_feature() is called from arch/powerpc/kernel/kvm.c,
  arch/x86/kernel/kvm.c, and arch/x86/kernel/kvmclock.c, all of which
  are enabled by CONFIG_KVM_GUEST.
 
  I did find one example of kvm_para_available() being used in an
  unexpected place
  -- sound/pci/intel8x0.c.  It defines its own non-CONFIG_KVM_GUEST
  stub, even though x86 defines kvm_para_available() using inline CPUID
  stuff which should work without CONFIG_KVM_GUEST.
  I'm not sure why it even needs to do that, though -- shouldn't the
  subsequent PCI subsystem vendor/device check should be sufficient?
  No hypercalls are involved.
 
  That said, the possibility that some random driver might want to make
  use of paravirt features is a decent argument for keeping the stub.
 
 
  I am not sure where we are agreeing on?
  Do we want to remove the stub in arch/powerpc/include/asm/kvm_para.h ? as
 there is no caller without KVM_GUEST and in future caller ensure this to be
 called only from code selected by KVM_GUEST?
 
  Or let this stub stay to avoid any random driver calling this ?
 
 I think the most reasonable way forward is to add a stub for non-CONFIG_EPAPR 
 to
 the epapr code, then replace the kvm bits with generic epapr bits (which your
 patches already do).

Please describe which stub you are talking about.

Thanks
-Bharat

 
 With that we should be 100% equivalent to today's code, just with a lot less
 lines of code :).
 
 
 Alex
 


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to epapr_hypercall()

2013-10-07 Thread Bhushan Bharat-R65777
  at least when I can avoid it. With the current code the compiler
  would be
  smart enough to just optimize out the complete branch.
 
  Sure.  My point is, where would you be calling that where the
  entire file isn't predicated on (or selecting) CONFIG_KVM_GUEST or
 similar?
 
  We don't do these stubs for every single function in the kernel
  -- only ones where the above is a reasonable use case.
 
  Yeah, I'm fine on dropping it, but we need to make that a
  conscious decision
  and verify that no caller relies on it.
 
  kvm_para_has_feature() is called from arch/powerpc/kernel/kvm.c,
  arch/x86/kernel/kvm.c, and arch/x86/kernel/kvmclock.c, all of which
  are enabled by CONFIG_KVM_GUEST.
 
  I did find one example of kvm_para_available() being used in an
  unexpected place
  -- sound/pci/intel8x0.c.  It defines its own non-CONFIG_KVM_GUEST
  stub, even though x86 defines kvm_para_available() using inline
  CPUID stuff which should work without CONFIG_KVM_GUEST.
  I'm not sure why it even needs to do that, though -- shouldn't the
  subsequent PCI subsystem vendor/device check should be sufficient?
  No hypercalls are involved.
 
  That said, the possibility that some random driver might want to
  make use of paravirt features is a decent argument for keeping the stub.
 
 
  I am not sure where we are agreeing on?
  Do we want to remove the stub in arch/powerpc/include/asm/kvm_para.h
  ? as
  there is no caller without KVM_GUEST and in future caller ensure this
  to be called only from code selected by KVM_GUEST?
 
  Or let this stub stay to avoid any random driver calling this ?
 
  I think the most reasonable way forward is to add a stub for
  non-CONFIG_EPAPR to the epapr code, then replace the kvm bits with
  generic epapr bits (which your patches already do).
 
  Please describe which stub you are talking about.
 
 kvm_hypercall is always available, regardless of the config option, which 
 makes
 all its subfunctions always available as well.

This patch renames kvm_hypercall() to epapr_hypercall() and which is always 
available. And the kvm_hypercall() friends now directly calls epapr_hypercall().
IIUC, So what you are trying to say is let the kvm_hypercall() friends keep on 
calling kvm_hypercall() itself and a sub something like this:

#ifdef CONFIG_KVM_GUEST
 
static unsigned long kvm_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr)
{
return epapr_hypercall(in, out. nr);
}
 
 #else
static unsigned long kvm_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr) {
 return EV_UNIMPLEMENTED;
}
-

I am still not really convinced about why we want to keep this stub where we 
know this is not called outside KVM_GUEST and calling this without KVM_GUEST is 
debatable.

Thanks
-Bharat

Thanks
-Bharat

 
 
 Alex
 
 ---
 
 #ifdef CONFIG_KVM_GUEST
 
 #include linux/of.h
 
 static inline int kvm_para_available(void) {
 struct device_node *hyper_node;
 
 hyper_node = of_find_node_by_path(/hypervisor);
 if (!hyper_node)
 return 0;
 
 if (!of_device_is_compatible(hyper_node, linux,kvm))
 return 0;
 
 return 1;
 }
 
 extern unsigned long kvm_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr);
 
 #else
 
 static inline int kvm_para_available(void) {
 return 0;
 }
 
 static unsigned long kvm_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr) {
 return EV_UNIMPLEMENTED;
 }
 
 #endif
 


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to epapr_hypercall()

2013-10-07 Thread Bhushan Bharat-R65777


 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
 Behalf Of Alexander Graf
 Sent: Monday, October 07, 2013 9:16 PM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; k...@vger.kernel.org
 Subject: Re: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to
 epapr_hypercall()
 
 
 On 07.10.2013, at 17:43, Bhushan Bharat-R65777 r65...@freescale.com wrote:
 
  at least when I can avoid it. With the current code the
  compiler would be
  smart enough to just optimize out the complete branch.
 
  Sure.  My point is, where would you be calling that where the
  entire file isn't predicated on (or selecting) CONFIG_KVM_GUEST
  or
  similar?
 
  We don't do these stubs for every single function in the kernel
  -- only ones where the above is a reasonable use case.
 
  Yeah, I'm fine on dropping it, but we need to make that a
  conscious decision
  and verify that no caller relies on it.
 
  kvm_para_has_feature() is called from arch/powerpc/kernel/kvm.c,
  arch/x86/kernel/kvm.c, and arch/x86/kernel/kvmclock.c, all of
  which are enabled by CONFIG_KVM_GUEST.
 
  I did find one example of kvm_para_available() being used in an
  unexpected place
  -- sound/pci/intel8x0.c.  It defines its own non-CONFIG_KVM_GUEST
  stub, even though x86 defines kvm_para_available() using inline
  CPUID stuff which should work without CONFIG_KVM_GUEST.
  I'm not sure why it even needs to do that, though -- shouldn't
  the subsequent PCI subsystem vendor/device check should be sufficient?
  No hypercalls are involved.
 
  That said, the possibility that some random driver might want to
  make use of paravirt features is a decent argument for keeping the 
  stub.
 
 
  I am not sure where we are agreeing on?
  Do we want to remove the stub in
  arch/powerpc/include/asm/kvm_para.h
  ? as
  there is no caller without KVM_GUEST and in future caller ensure
  this to be called only from code selected by KVM_GUEST?
 
  Or let this stub stay to avoid any random driver calling this ?
 
  I think the most reasonable way forward is to add a stub for
  non-CONFIG_EPAPR to the epapr code, then replace the kvm bits with
  generic epapr bits (which your patches already do).
 
  Please describe which stub you are talking about.
 
  kvm_hypercall is always available, regardless of the config option,
  which makes all its subfunctions always available as well.
 
  This patch renames kvm_hypercall() to epapr_hypercall() and which is always
 available. And the kvm_hypercall() friends now directly calls 
 epapr_hypercall().
  IIUC, So what you are trying to say is let the kvm_hypercall() friends keep 
  on
 calling kvm_hypercall() itself and a sub something like this:
 
 No, what I'm saying is that we either
 
   a) drop the whole #ifndef code path consciously. This would have to be a
 separate patch with a separate discussion. It's orthogonal to combining
 kvm_hypercall() and epapr_hypercall()
 
   b) add the #ifndef path to epapr_hypercall()

Do you mean like this in arch/powerpc/include/asm/epapr_hcalls.h

#ifdef CONFIG_KVM_GUEST
static inline unsigned long epapr_hypercall(unsigned long *in,
   unsigned long *out,
   unsigned long nr)
{
 // code for this function
} 
#else
static inline unsigned long epapr_hypercall(unsigned long *in,
   unsigned long *out,
   unsigned long nr)
{
return EV_UNIMPLEMENTED;
}
#endif

 
 I prefer b, Scott prefers b.
 
 
 Alex
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body
 of a message to majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to epapr_hypercall()

2013-10-07 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Monday, October 07, 2013 9:43 PM
 To: Bhushan Bharat-R65777
 Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; k...@vger.kernel.org
 Subject: Re: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to
 epapr_hypercall()
 
 
 On 07.10.2013, at 18:04, Bhushan Bharat-R65777 r65...@freescale.com wrote:
 
 
 
  -Original Message-
  From: kvm-ppc-ow...@vger.kernel.org
  [mailto:kvm-ppc-ow...@vger.kernel.org] On Behalf Of Alexander Graf
  Sent: Monday, October 07, 2013 9:16 PM
  To: Bhushan Bharat-R65777
  Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; k...@vger.kernel.org
  Subject: Re: [PATCH 1/2] kvm/powerpc: rename kvm_hypercall() to
  epapr_hypercall()
 
 
  On 07.10.2013, at 17:43, Bhushan Bharat-R65777 r65...@freescale.com 
  wrote:
 
  at least when I can avoid it. With the current code the
  compiler would be
  smart enough to just optimize out the complete branch.
 
  Sure.  My point is, where would you be calling that where the
  entire file isn't predicated on (or selecting)
  CONFIG_KVM_GUEST or
  similar?
 
  We don't do these stubs for every single function in the
  kernel
  -- only ones where the above is a reasonable use case.
 
  Yeah, I'm fine on dropping it, but we need to make that a
  conscious decision
  and verify that no caller relies on it.
 
  kvm_para_has_feature() is called from
  arch/powerpc/kernel/kvm.c, arch/x86/kernel/kvm.c, and
  arch/x86/kernel/kvmclock.c, all of which are enabled by
 CONFIG_KVM_GUEST.
 
  I did find one example of kvm_para_available() being used in an
  unexpected place
  -- sound/pci/intel8x0.c.  It defines its own
  non-CONFIG_KVM_GUEST stub, even though x86 defines
  kvm_para_available() using inline CPUID stuff which should work 
  without
 CONFIG_KVM_GUEST.
  I'm not sure why it even needs to do that, though -- shouldn't
  the subsequent PCI subsystem vendor/device check should be 
  sufficient?
  No hypercalls are involved.
 
  That said, the possibility that some random driver might want
  to make use of paravirt features is a decent argument for keeping the
 stub.
 
 
  I am not sure where we are agreeing on?
  Do we want to remove the stub in
  arch/powerpc/include/asm/kvm_para.h
  ? as
  there is no caller without KVM_GUEST and in future caller ensure
  this to be called only from code selected by KVM_GUEST?
 
  Or let this stub stay to avoid any random driver calling this ?
 
  I think the most reasonable way forward is to add a stub for
  non-CONFIG_EPAPR to the epapr code, then replace the kvm bits
  with generic epapr bits (which your patches already do).
 
  Please describe which stub you are talking about.
 
  kvm_hypercall is always available, regardless of the config option,
  which makes all its subfunctions always available as well.
 
  This patch renames kvm_hypercall() to epapr_hypercall() and which is
  always
  available. And the kvm_hypercall() friends now directly calls
 epapr_hypercall().
  IIUC, So what you are trying to say is let the kvm_hypercall()
  friends keep on
  calling kvm_hypercall() itself and a sub something like this:
 
  No, what I'm saying is that we either
 
   a) drop the whole #ifndef code path consciously. This would have to
  be a separate patch with a separate discussion. It's orthogonal to
  combining
  kvm_hypercall() and epapr_hypercall()
 
   b) add the #ifndef path to epapr_hypercall()
 
  Do you mean like this in arch/powerpc/include/asm/epapr_hcalls.h
 
  #ifdef CONFIG_KVM_GUEST
 
 CONFIG_EPAPR_PARAVIRT

Yes, I was getting confused why only KVM_GUEST as this not specific to 
KVM-GUEST.
Thank you

 
 Apart from that, yes, I think that's what we want.
 
 
 Alex
 
  static inline unsigned long epapr_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr) { // code for this
  function } #else static inline unsigned long epapr_hypercall(unsigned
  long *in,
unsigned long *out,
unsigned long nr) {
  return EV_UNIMPLEMENTED;
  }
  #endif
 
 


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-07 Thread Bhushan Bharat-R65777
   Do you really want module dependencies between vfio and your core
   kernel MSI setup?  Look at the vfio external user interface that we've
 already defined.
   That allows other components of the kernel to get a proper reference
   to a vfio group.  From there you can work out how to get what you
   want.  Another alternative is that vfio could register an MSI to
   IOVA mapping with architecture code when the mapping is created.
   The MSI setup path could then do a lookup in architecture code for
   the mapping.  You could even store the MSI to IOVA mapping in VFIO
   and create an interface where SET_IRQ passes that mapping into setup code.
 
  Ok, What I want is to get IOVA associated with a physical address
  (physical address of MSI-bank).
  And currently I do not see a way to know IOVA of a physical address
  and doing all this domain get and then search through all of
  iommu-windows of that domain.
 
  What if we add an iommu-API which can return the IOVA mapping of a
  physical address. Current use case is setting up MSI's for aperture
  type of IOMMU also getting a phys_to_iova() mapping is independent of
  VFIO, your thought?
 
 A physical address can be mapped to multiple IOVAs, so the interface seems
 flawed by design.  It also has the same problem as above, it's a backdoor that
 can be called asynchronous to the owner of the domain, so what reason is there
 to believe the result?  It just replaces an iommu_domain pointer with an IOVA.
 VFIO knows this mapping, so why are we trying to go behind its back and ask 
 the
 IOMMU?
IOMMU is the final place where mapping is created, so may be today it is 
calling on behalf of VFIO, tomorrow it can be for normal Linux or some other 
interface. But I am fine to directly talk to vfio and will not try to solve a 
problem which does not exists today.

MSI subsystem knows pdev (pci device) and physical address, then what interface 
it will use to get the IOVA from VFIO?

Thanks
-Bharat

  Thanks,
 
 Alex
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-06 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Alex Williamson [mailto:alex.william...@redhat.com]
> Sent: Friday, October 04, 2013 11:42 PM
> To: Bhushan Bharat-R65777
> Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
> linux-
> ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
> p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Fri, 2013-10-04 at 17:23 +, Bhushan Bharat-R65777 wrote:
> >
> > > -Original Message-
> > > From: Alex Williamson [mailto:alex.william...@redhat.com]
> > > Sent: Friday, October 04, 2013 10:43 PM
> > > To: Bhushan Bharat-R65777
> > > Cc: j...@8bytes.org; b...@kernel.crashing.org;
> > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
> > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
> > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
> > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > device
> > >
> > > On Fri, 2013-10-04 at 16:47 +, Bhushan Bharat-R65777 wrote:
> > > >
> > > > > -----Original Message-
> > > > > From: Alex Williamson [mailto:alex.william...@redhat.com]
> > > > > Sent: Friday, October 04, 2013 9:15 PM
> > > > > To: Bhushan Bharat-R65777
> > > > > Cc: j...@8bytes.org; b...@kernel.crashing.org;
> > > > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
> > > > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
> > > > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > foundation.org
> > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > device
> > > > >
> > > > > On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote:
> > > > > >
> > > > > > > -Original Message-
> > > > > > > From: linux-pci-ow...@vger.kernel.org
> > > > > > > [mailto:linux-pci-ow...@vger.kernel.org]
> > > > > > > On Behalf Of Alex Williamson
> > > > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > > > To: Bhushan Bharat-R65777
> > > > > > > Cc: j...@8bytes.org; b...@kernel.crashing.org;
> > > > > > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
> > > > > > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
> > > > > > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > > > foundation.org; Bhushan Bharat-R65777
> > > > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
> > > > > > > of a device
> > > > > > >
> > > > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > > > > This api return the iommu domain to which the device is 
> > > > > > > > attached.
> > > > > > > > The iommu_domain is required for making API calls related to
> iommu.
> > > > > > > > Follow up patches which use this API to know iommu maping.
> > > > > > > >
> > > > > > > > Signed-off-by: Bharat Bhushan
> > > > > > > > 
> > > > > > > > ---
> > > > > > > >  drivers/iommu/iommu.c |   10 ++
> > > > > > > >  include/linux/iommu.h |7 +++
> > > > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > > > > > > index
> > > > > > > > fbe9ca7..6ac5f50 100644
> > > > > > > > --- a/drivers/iommu/iommu.c
> > > > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct
> > > > > > > > iommu_domain *domain, struct device *dev)  }
> > > > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > > > >
> > > > > > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > > > > > +   struct iommu_ops *ops = dev->bus->iommu_ops;
>

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-06 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, October 04, 2013 11:42 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 On Fri, 2013-10-04 at 17:23 +, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Alex Williamson [mailto:alex.william...@redhat.com]
   Sent: Friday, October 04, 2013 10:43 PM
   To: Bhushan Bharat-R65777
   Cc: j...@8bytes.org; b...@kernel.crashing.org;
   ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
   linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
   ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
   Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
   device
  
   On Fri, 2013-10-04 at 16:47 +, Bhushan Bharat-R65777 wrote:
   
 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, October 04, 2013 9:15 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org;
 ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
 linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
 ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
 device

 On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: linux-pci-ow...@vger.kernel.org
   [mailto:linux-pci-ow...@vger.kernel.org]
   On Behalf Of Alex Williamson
   Sent: Wednesday, September 25, 2013 10:16 PM
   To: Bhushan Bharat-R65777
   Cc: j...@8bytes.org; b...@kernel.crashing.org;
   ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
   linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
   ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
   foundation.org; Bhushan Bharat-R65777
   Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
   of a device
  
   On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
This api return the iommu domain to which the device is 
attached.
The iommu_domain is required for making API calls related to
 iommu.
Follow up patches which use this API to know iommu maping.
   
Signed-off-by: Bharat Bhushan
bharat.bhus...@freescale.com
---
 drivers/iommu/iommu.c |   10 ++
 include/linux/iommu.h |7 +++
 2 files changed, 17 insertions(+), 0 deletions(-)
   
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index
fbe9ca7..6ac5f50 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -696,6 +696,16 @@ void iommu_detach_device(struct
iommu_domain *domain, struct device *dev)  }
EXPORT_SYMBOL_GPL(iommu_detach_device);
   
+struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
+   struct iommu_ops *ops = dev-bus-iommu_ops;
+
+   if (unlikely(ops == NULL || ops-get_dev_iommu_domain ==
 NULL))
+   return NULL;
+
+   return ops-get_dev_iommu_domain(dev); }
+EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
  
   What prevents this from racing iommu_domain_free()?  There's
   no references acquired, so there's no reason for the caller
   to assume the
 pointer is valid.
 
  Sorry for late query, somehow this email went into a folder
  and escaped;
 
  Just to be sure, there is not lock at generic struct
  iommu_domain, but IP
 specific structure (link FSL domain) linked in
 iommu_domain-priv have a lock, so we need to ensure this race
 in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c), right?

 No, it's not sufficient to make sure that your use of the
 interface is race free.  The interface itself needs to be
 designed so that it's difficult to use incorrectly.
   
So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
iommu_get_dev_domain() will return domain with the lock held, and
iommu_put_dev_domain() will release the lock? And
iommu_get_dev_domain() must always be followed by
iommu_get_dev_domain().
  
   What lock?  get/put are generally used for reference counting, not
   locking in the kernel.
  
 That's not the case here.  This is a backdoor to get the iommu
 domain from the iommu driver regardless of who is using it or how.
 The iommu domain is created and managed by vfio, so shouldn't we
 be looking at how to do this through vfio?
   
Let me

RE: [PATCH 1/3 v6] kvm: powerpc: keep only pte search logic in lookup_linux_pte

2013-10-06 Thread Bhushan Bharat-R65777
Hi Paul,

 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
 Behalf Of Paul Mackerras
 Sent: Monday, October 07, 2013 4:39 AM
 To: Bhushan Bharat-R65777
 Cc: ag...@suse.de; kvm@vger.kernel.org; kvm-...@vger.kernel.org; Wood Scott-
 B07421; b...@kernel.crashing.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 1/3 v6] kvm: powerpc: keep only pte search logic in
 lookup_linux_pte
 
 On Fri, Oct 04, 2013 at 08:25:31PM +0530, Bharat Bhushan wrote:
  lookup_linux_pte() was searching for a pte and also sets access flags
  is writable. This function now searches only pte while access flag
  setting is done explicitly.


 
 So in order to reduce some code duplication, you have added code duplication 
 in
 the existing callers of this function.  I'm not convinced it's an overall win.

lookup_linux_pte(): as per name it is supposed to only lookup for a pte, but it 
is doing more than that (Also updating the pte). So I made this function to 
only do lookup (which also check size). I am not an MM expert but I think we 
can make this function better like you suggested checking pte_present() only if 
_PAGE_BUSY not set.

 What's left in this function is pretty trivial, just a call to
 find_linux_pte_or_hugepte() and some pagesize computations.  I would prefer 
 you
 found a way to do what you want without adding code duplication at the 
 existing
 call sites.

What about doing this way:
1) A function which will do the lookup for Linux pte. May be call that as 
lookup_linux_pte()
2) lookup + page update (what the existing function lookup_linux_pte() is 
doing). Will rename this function to lookup_linux_pte_and_update(), which will 
call above defined lookup_linux_pte()


Thanks
-Bharat

  Maybe you could have a new find_linux_pte_and_check_pagesize() and
 call that from the existing lookup_linux_pte().
 
 The other thing you've done, without commenting on why you have done it, is to
 add a pte_present check without having looked at _PAGE_BUSY.
 kvmppc_read_update_linux_pte() only checks _PAGE_PRESENT after checking that
 _PAGE_BUSY is clear, so this is a semantic change, which I think is wrong for
 server processors.
 
 So, on the whole, NACK from me for this patch.
 
 Paul.
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body
 of a message to majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 1/3 v6] kvm: powerpc: keep only pte search logic in lookup_linux_pte

2013-10-06 Thread Bhushan Bharat-R65777
Hi Paul,

 -Original Message-
 From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc-ow...@vger.kernel.org] On
 Behalf Of Paul Mackerras
 Sent: Monday, October 07, 2013 4:39 AM
 To: Bhushan Bharat-R65777
 Cc: ag...@suse.de; k...@vger.kernel.org; kvm-ppc@vger.kernel.org; Wood Scott-
 B07421; b...@kernel.crashing.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 1/3 v6] kvm: powerpc: keep only pte search logic in
 lookup_linux_pte
 
 On Fri, Oct 04, 2013 at 08:25:31PM +0530, Bharat Bhushan wrote:
  lookup_linux_pte() was searching for a pte and also sets access flags
  is writable. This function now searches only pte while access flag
  setting is done explicitly.


 
 So in order to reduce some code duplication, you have added code duplication 
 in
 the existing callers of this function.  I'm not convinced it's an overall win.

lookup_linux_pte(): as per name it is supposed to only lookup for a pte, but it 
is doing more than that (Also updating the pte). So I made this function to 
only do lookup (which also check size). I am not an MM expert but I think we 
can make this function better like you suggested checking pte_present() only if 
_PAGE_BUSY not set.

 What's left in this function is pretty trivial, just a call to
 find_linux_pte_or_hugepte() and some pagesize computations.  I would prefer 
 you
 found a way to do what you want without adding code duplication at the 
 existing
 call sites.

What about doing this way:
1) A function which will do the lookup for Linux pte. May be call that as 
lookup_linux_pte()
2) lookup + page update (what the existing function lookup_linux_pte() is 
doing). Will rename this function to lookup_linux_pte_and_update(), which will 
call above defined lookup_linux_pte()


Thanks
-Bharat

  Maybe you could have a new find_linux_pte_and_check_pagesize() and
 call that from the existing lookup_linux_pte().
 
 The other thing you've done, without commenting on why you have done it, is to
 add a pte_present check without having looked at _PAGE_BUSY.
 kvmppc_read_update_linux_pte() only checks _PAGE_PRESENT after checking that
 _PAGE_BUSY is clear, so this is a semantic change, which I think is wrong for
 server processors.
 
 So, on the whole, NACK from me for this patch.
 
 Paul.
 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body
 of a message to majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-06 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, October 04, 2013 11:42 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 On Fri, 2013-10-04 at 17:23 +, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Alex Williamson [mailto:alex.william...@redhat.com]
   Sent: Friday, October 04, 2013 10:43 PM
   To: Bhushan Bharat-R65777
   Cc: j...@8bytes.org; b...@kernel.crashing.org;
   ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
   linuxppc-dev@lists.ozlabs.org; linux- p...@vger.kernel.org;
   ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
   Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
   device
  
   On Fri, 2013-10-04 at 16:47 +, Bhushan Bharat-R65777 wrote:
   
 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, October 04, 2013 9:15 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org;
 ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
 linuxppc-dev@lists.ozlabs.org; linux- p...@vger.kernel.org;
 ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
 device

 On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: linux-pci-ow...@vger.kernel.org
   [mailto:linux-pci-ow...@vger.kernel.org]
   On Behalf Of Alex Williamson
   Sent: Wednesday, September 25, 2013 10:16 PM
   To: Bhushan Bharat-R65777
   Cc: j...@8bytes.org; b...@kernel.crashing.org;
   ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
   linuxppc-dev@lists.ozlabs.org; linux- p...@vger.kernel.org;
   ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
   foundation.org; Bhushan Bharat-R65777
   Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain
   of a device
  
   On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
This api return the iommu domain to which the device is 
attached.
The iommu_domain is required for making API calls related to
 iommu.
Follow up patches which use this API to know iommu maping.
   
Signed-off-by: Bharat Bhushan
bharat.bhus...@freescale.com
---
 drivers/iommu/iommu.c |   10 ++
 include/linux/iommu.h |7 +++
 2 files changed, 17 insertions(+), 0 deletions(-)
   
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index
fbe9ca7..6ac5f50 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -696,6 +696,16 @@ void iommu_detach_device(struct
iommu_domain *domain, struct device *dev)  }
EXPORT_SYMBOL_GPL(iommu_detach_device);
   
+struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
+   struct iommu_ops *ops = dev-bus-iommu_ops;
+
+   if (unlikely(ops == NULL || ops-get_dev_iommu_domain ==
 NULL))
+   return NULL;
+
+   return ops-get_dev_iommu_domain(dev); }
+EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
  
   What prevents this from racing iommu_domain_free()?  There's
   no references acquired, so there's no reason for the caller
   to assume the
 pointer is valid.
 
  Sorry for late query, somehow this email went into a folder
  and escaped;
 
  Just to be sure, there is not lock at generic struct
  iommu_domain, but IP
 specific structure (link FSL domain) linked in
 iommu_domain-priv have a lock, so we need to ensure this race
 in FSL iommu code (say drivers/iommu/fsl_pamu_domain.c), right?

 No, it's not sufficient to make sure that your use of the
 interface is race free.  The interface itself needs to be
 designed so that it's difficult to use incorrectly.
   
So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
iommu_get_dev_domain() will return domain with the lock held, and
iommu_put_dev_domain() will release the lock? And
iommu_get_dev_domain() must always be followed by
iommu_get_dev_domain().
  
   What lock?  get/put are generally used for reference counting, not
   locking in the kernel.
  
 That's not the case here.  This is a backdoor to get the iommu
 domain from the iommu driver regardless of who is using it or how.
 The iommu domain is created and managed by vfio, so shouldn't we
 be looking at how to do this through vfio?
   
Let me

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Alex Williamson [mailto:alex.william...@redhat.com]
> Sent: Friday, October 04, 2013 10:43 PM
> To: Bhushan Bharat-R65777
> Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
> linux-
> ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
> p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Fri, 2013-10-04 at 16:47 +, Bhushan Bharat-R65777 wrote:
> >
> > > -Original Message-
> > > From: Alex Williamson [mailto:alex.william...@redhat.com]
> > > Sent: Friday, October 04, 2013 9:15 PM
> > > To: Bhushan Bharat-R65777
> > > Cc: j...@8bytes.org; b...@kernel.crashing.org;
> > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
> > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
> > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
> > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > device
> > >
> > > On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote:
> > > >
> > > > > -Original Message-
> > > > > From: linux-pci-ow...@vger.kernel.org
> > > > > [mailto:linux-pci-ow...@vger.kernel.org]
> > > > > On Behalf Of Alex Williamson
> > > > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > > > To: Bhushan Bharat-R65777
> > > > > Cc: j...@8bytes.org; b...@kernel.crashing.org;
> > > > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
> > > > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
> > > > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> > > > > foundation.org; Bhushan Bharat-R65777
> > > > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > > > device
> > > > >
> > > > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > > > This api return the iommu domain to which the device is attached.
> > > > > > The iommu_domain is required for making API calls related to iommu.
> > > > > > Follow up patches which use this API to know iommu maping.
> > > > > >
> > > > > > Signed-off-by: Bharat Bhushan 
> > > > > > ---
> > > > > >  drivers/iommu/iommu.c |   10 ++
> > > > > >  include/linux/iommu.h |7 +++
> > > > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > > > > > index
> > > > > > fbe9ca7..6ac5f50 100644
> > > > > > --- a/drivers/iommu/iommu.c
> > > > > > +++ b/drivers/iommu/iommu.c
> > > > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct
> > > > > > iommu_domain *domain, struct device *dev)  }
> > > > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > > > >
> > > > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > > > +   struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > > > +
> > > > > > +   if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > > > > > +   return NULL;
> > > > > > +
> > > > > > +   return ops->get_dev_iommu_domain(dev); }
> > > > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > > > >
> > > > > What prevents this from racing iommu_domain_free()?  There's no
> > > > > references acquired, so there's no reason for the caller to
> > > > > assume the
> > > pointer is valid.
> > > >
> > > > Sorry for late query, somehow this email went into a folder and
> > > > escaped;
> > > >
> > > > Just to be sure, there is not lock at generic "struct
> > > > iommu_domain", but IP
> > > specific structure (link FSL domain) linked in iommu_domain->priv
> > > have a lock, so we need to ensure this race in FSL iommu code (say
> > > drivers/iommu/fsl_pamu_domain.c), right?
> > >
> > > No, it's not sufficient to make sure that your use of the interface
> > > is race free.  The interface itself needs to be designed so that
> &

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Alex Williamson [mailto:alex.william...@redhat.com]
> Sent: Friday, October 04, 2013 9:15 PM
> To: Bhushan Bharat-R65777
> Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
> linux-
> ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
> p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote:
> >
> > > -Original Message-
> > > From: linux-pci-ow...@vger.kernel.org
> > > [mailto:linux-pci-ow...@vger.kernel.org]
> > > On Behalf Of Alex Williamson
> > > Sent: Wednesday, September 25, 2013 10:16 PM
> > > To: Bhushan Bharat-R65777
> > > Cc: j...@8bytes.org; b...@kernel.crashing.org;
> > > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
> > > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
> > > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org;
> > > Bhushan Bharat-R65777
> > > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > > device
> > >
> > > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > > This api return the iommu domain to which the device is attached.
> > > > The iommu_domain is required for making API calls related to iommu.
> > > > Follow up patches which use this API to know iommu maping.
> > > >
> > > > Signed-off-by: Bharat Bhushan 
> > > > ---
> > > >  drivers/iommu/iommu.c |   10 ++
> > > >  include/linux/iommu.h |7 +++
> > > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > > >
> > > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
> > > > fbe9ca7..6ac5f50 100644
> > > > --- a/drivers/iommu/iommu.c
> > > > +++ b/drivers/iommu/iommu.c
> > > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
> > > > *domain, struct device *dev)  }
> > > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > > >
> > > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > > +   struct iommu_ops *ops = dev->bus->iommu_ops;
> > > > +
> > > > +   if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > > > +   return NULL;
> > > > +
> > > > +   return ops->get_dev_iommu_domain(dev); }
> > > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> > >
> > > What prevents this from racing iommu_domain_free()?  There's no
> > > references acquired, so there's no reason for the caller to assume the
> pointer is valid.
> >
> > Sorry for late query, somehow this email went into a folder and
> > escaped;
> >
> > Just to be sure, there is not lock at generic "struct iommu_domain", but IP
> specific structure (link FSL domain) linked in iommu_domain->priv have a lock,
> so we need to ensure this race in FSL iommu code (say
> drivers/iommu/fsl_pamu_domain.c), right?
> 
> No, it's not sufficient to make sure that your use of the interface is race
> free.  The interface itself needs to be designed so that it's difficult to use
> incorrectly.

So we can define iommu_get_dev_domain()/iommu_put_dev_domain();  
iommu_get_dev_domain() will return domain with the lock held, and 
iommu_put_dev_domain() will release the lock? And iommu_get_dev_domain() must 
always be followed by iommu_get_dev_domain().


> That's not the case here.  This is a backdoor to get the iommu
> domain from the iommu driver regardless of who is using it or how.  The iommu
> domain is created and managed by vfio, so shouldn't we be looking at how to do
> this through vfio?

Let me first describe what we are doing here:
During initialization:-
 - vfio talks to MSI system to know the MSI-page and size
 - vfio then interacts with iommu to map the MSI-page in iommu (IOVA is decided 
by userspace and physical address is the MSI-page)
 - So the IOVA subwindow mapping is created in iommu and yes VFIO know about 
this mapping.

Now do SET_IRQ(MSI/MSIX) ioctl:
 - calls pci_enable_msix()/pci_enable_msi_block(): which is supposed to set MSI 
address/data in device.
 - So in current implementation (this patchset) msi-subsystem gets the IOVA 
from iommu via this defined interface.
 - Are you saying that rather than getting this from iommu, we should get this 
from vfio? What difference does this make?

Thanks
-Bharat

> It seems like you'd want to us

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


> -Original Message-
> From: Bhushan Bharat-R65777
> Sent: Friday, October 04, 2013 3:24 PM
> To: 'Alex Williamson'
> Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
> linux-
> ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
> p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org
> Subject: RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> 
> 
> > -Original Message-
> > From: linux-pci-ow...@vger.kernel.org
> > [mailto:linux-pci-ow...@vger.kernel.org]
> > On Behalf Of Alex Williamson
> > Sent: Wednesday, September 25, 2013 10:16 PM
> > To: Bhushan Bharat-R65777
> > Cc: j...@8bytes.org; b...@kernel.crashing.org;
> > ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
> > linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
> > ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org;
> > Bhushan Bharat-R65777
> > Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
> > device
> >
> > On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > > This api return the iommu domain to which the device is attached.
> > > The iommu_domain is required for making API calls related to iommu.
> > > Follow up patches which use this API to know iommu maping.
> > >
> > > Signed-off-by: Bharat Bhushan 
> > > ---
> > >  drivers/iommu/iommu.c |   10 ++
> > >  include/linux/iommu.h |7 +++
> > >  2 files changed, 17 insertions(+), 0 deletions(-)
> > >
> > > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
> > > fbe9ca7..6ac5f50 100644
> > > --- a/drivers/iommu/iommu.c
> > > +++ b/drivers/iommu/iommu.c
> > > @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
> > > *domain, struct device *dev)  }
> > > EXPORT_SYMBOL_GPL(iommu_detach_device);
> > >
> > > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > > + struct iommu_ops *ops = dev->bus->iommu_ops;
> > > +
> > > + if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > > + return NULL;
> > > +
> > > + return ops->get_dev_iommu_domain(dev); }
> > > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> >
> > What prevents this from racing iommu_domain_free()?  There's no
> > references acquired, so there's no reason for the caller to assume the 
> > pointer
> is valid.
> 
> Sorry for late query, somehow this email went into a folder and escaped;
> 
> Just to be sure, there is not lock at generic "struct iommu_domain", but IP
> specific structure (link FSL domain) linked in iommu_domain->priv have a lock,
> so we need to ensure this race in FSL iommu code (say
> drivers/iommu/fsl_pamu_domain.c), right?

Further thinking of this, there are more problems here:
 - Like MSI subsystem will call iommu_get_dev_domain(), which will take a lock, 
find the domain pointer, release the lock, and return the domain
 - Now if domain in freed up
 - While MSI subsystem tries to do work on domain (like 
get_attribute/set_attribute etc) ???

So can we do like iommu_get_dev_domain() will return domain with the lock held, 
and iommu_put_dev_domain() will release the lock? And iommu_get_dev_domain() 
must always be followed by iommu_get_dev_domain()

Thanks
-Bharat

> 
> Thanks
> -Bharat
> 
> >
> > >  /*
> > >   * IOMMU groups are really the natrual working unit of the IOMMU, but
> > >   * the IOMMU API works on domains and devices.  Bridge that gap by
> > > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > > 7ea319e..fa046bd 100644
> > > --- a/include/linux/iommu.h
> > > +++ b/include/linux/iommu.h
> > > @@ -127,6 +127,7 @@ struct iommu_ops {
> > >   int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
> > >   /* Get the numer of window per domain */
> > >   u32 (*domain_get_windows)(struct iommu_domain *domain);
> > > + struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
> > >
> > >   unsigned long pgsize_bitmap;
> > >  };
> > > @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct
> > > iommu_domain
> > *domain, u32 wnd_nr,
> > > phys_addr_t offset, u64 size,
> > > int prot);
> > >  extern void iommu_domain_window_disable(struct iommu_domain
> > > *domain,
> > > u32 wnd_nr);
> > > +extern struct

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


> -Original Message-
> From: linux-pci-ow...@vger.kernel.org [mailto:linux-pci-ow...@vger.kernel.org]
> On Behalf Of Alex Williamson
> Sent: Wednesday, September 25, 2013 10:16 PM
> To: Bhushan Bharat-R65777
> Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
> linux-
> ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
> p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
> foundation.org; Bhushan Bharat-R65777
> Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
> 
> On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
> > This api return the iommu domain to which the device is attached.
> > The iommu_domain is required for making API calls related to iommu.
> > Follow up patches which use this API to know iommu maping.
> >
> > Signed-off-by: Bharat Bhushan 
> > ---
> >  drivers/iommu/iommu.c |   10 ++
> >  include/linux/iommu.h |7 +++
> >  2 files changed, 17 insertions(+), 0 deletions(-)
> >
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
> > fbe9ca7..6ac5f50 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
> > *domain, struct device *dev)  }
> > EXPORT_SYMBOL_GPL(iommu_detach_device);
> >
> > +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
> > +   struct iommu_ops *ops = dev->bus->iommu_ops;
> > +
> > +   if (unlikely(ops == NULL || ops->get_dev_iommu_domain == NULL))
> > +   return NULL;
> > +
> > +   return ops->get_dev_iommu_domain(dev); }
> > +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
> 
> What prevents this from racing iommu_domain_free()?  There's no references
> acquired, so there's no reason for the caller to assume the pointer is valid.

Sorry for late query, somehow this email went into a folder and escaped;

Just to be sure, there is not lock at generic "struct iommu_domain", but IP 
specific structure (link FSL domain) linked in iommu_domain->priv have a lock, 
so we need to ensure this race in FSL iommu code (say 
drivers/iommu/fsl_pamu_domain.c), right?

Thanks
-Bharat

> 
> >  /*
> >   * IOMMU groups are really the natrual working unit of the IOMMU, but
> >   * the IOMMU API works on domains and devices.  Bridge that gap by
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
> > 7ea319e..fa046bd 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -127,6 +127,7 @@ struct iommu_ops {
> > int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
> > /* Get the numer of window per domain */
> > u32 (*domain_get_windows)(struct iommu_domain *domain);
> > +   struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
> >
> > unsigned long pgsize_bitmap;
> >  };
> > @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct 
> > iommu_domain
> *domain, u32 wnd_nr,
> >   phys_addr_t offset, u64 size,
> >   int prot);
> >  extern void iommu_domain_window_disable(struct iommu_domain *domain,
> > u32 wnd_nr);
> > +extern struct iommu_domain *iommu_get_dev_domain(struct device *dev);
> >  /**
> >   * report_iommu_fault() - report about an IOMMU fault to the IOMMU 
> > framework
> >   * @domain: the iommu domain where the fault has happened @@ -284,6
> > +286,11 @@ static inline void iommu_domain_window_disable(struct
> > iommu_domain *domain,  {  }
> >
> > +static inline struct iommu_domain *iommu_get_dev_domain(struct device
> > +*dev) {
> > +   return NULL;
> > +}
> > +
> >  static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
> > *domain, dma_addr_t iova)  {
> > return 0;
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in the 
> body
> of a message to majord...@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html



RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: linux-pci-ow...@vger.kernel.org [mailto:linux-pci-ow...@vger.kernel.org]
 On Behalf Of Alex Williamson
 Sent: Wednesday, September 25, 2013 10:16 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
  This api return the iommu domain to which the device is attached.
  The iommu_domain is required for making API calls related to iommu.
  Follow up patches which use this API to know iommu maping.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
   drivers/iommu/iommu.c |   10 ++
   include/linux/iommu.h |7 +++
   2 files changed, 17 insertions(+), 0 deletions(-)
 
  diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
  fbe9ca7..6ac5f50 100644
  --- a/drivers/iommu/iommu.c
  +++ b/drivers/iommu/iommu.c
  @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
  *domain, struct device *dev)  }
  EXPORT_SYMBOL_GPL(iommu_detach_device);
 
  +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
  +   struct iommu_ops *ops = dev-bus-iommu_ops;
  +
  +   if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL))
  +   return NULL;
  +
  +   return ops-get_dev_iommu_domain(dev); }
  +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
 
 What prevents this from racing iommu_domain_free()?  There's no references
 acquired, so there's no reason for the caller to assume the pointer is valid.

Sorry for late query, somehow this email went into a folder and escaped;

Just to be sure, there is not lock at generic struct iommu_domain, but IP 
specific structure (link FSL domain) linked in iommu_domain-priv have a lock, 
so we need to ensure this race in FSL iommu code (say 
drivers/iommu/fsl_pamu_domain.c), right?

Thanks
-Bharat

 
   /*
* IOMMU groups are really the natrual working unit of the IOMMU, but
* the IOMMU API works on domains and devices.  Bridge that gap by
  diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
  7ea319e..fa046bd 100644
  --- a/include/linux/iommu.h
  +++ b/include/linux/iommu.h
  @@ -127,6 +127,7 @@ struct iommu_ops {
  int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
  /* Get the numer of window per domain */
  u32 (*domain_get_windows)(struct iommu_domain *domain);
  +   struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
 
  unsigned long pgsize_bitmap;
   };
  @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct 
  iommu_domain
 *domain, u32 wnd_nr,
phys_addr_t offset, u64 size,
int prot);
   extern void iommu_domain_window_disable(struct iommu_domain *domain,
  u32 wnd_nr);
  +extern struct iommu_domain *iommu_get_dev_domain(struct device *dev);
   /**
* report_iommu_fault() - report about an IOMMU fault to the IOMMU 
  framework
* @domain: the iommu domain where the fault has happened @@ -284,6
  +286,11 @@ static inline void iommu_domain_window_disable(struct
  iommu_domain *domain,  {  }
 
  +static inline struct iommu_domain *iommu_get_dev_domain(struct device
  +*dev) {
  +   return NULL;
  +}
  +
   static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
  *domain, dma_addr_t iova)  {
  return 0;
 
 
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-pci in the 
 body
 of a message to majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html



RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Bhushan Bharat-R65777
 Sent: Friday, October 04, 2013 3:24 PM
 To: 'Alex Williamson'
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 
 
  -Original Message-
  From: linux-pci-ow...@vger.kernel.org
  [mailto:linux-pci-ow...@vger.kernel.org]
  On Behalf Of Alex Williamson
  Sent: Wednesday, September 25, 2013 10:16 PM
  To: Bhushan Bharat-R65777
  Cc: j...@8bytes.org; b...@kernel.crashing.org;
  ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
  linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
  ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org;
  Bhushan Bharat-R65777
  Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
  device
 
  On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
   This api return the iommu domain to which the device is attached.
   The iommu_domain is required for making API calls related to iommu.
   Follow up patches which use this API to know iommu maping.
  
   Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
   ---
drivers/iommu/iommu.c |   10 ++
include/linux/iommu.h |7 +++
2 files changed, 17 insertions(+), 0 deletions(-)
  
   diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
   fbe9ca7..6ac5f50 100644
   --- a/drivers/iommu/iommu.c
   +++ b/drivers/iommu/iommu.c
   @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
   *domain, struct device *dev)  }
   EXPORT_SYMBOL_GPL(iommu_detach_device);
  
   +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
   + struct iommu_ops *ops = dev-bus-iommu_ops;
   +
   + if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL))
   + return NULL;
   +
   + return ops-get_dev_iommu_domain(dev); }
   +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
 
  What prevents this from racing iommu_domain_free()?  There's no
  references acquired, so there's no reason for the caller to assume the 
  pointer
 is valid.
 
 Sorry for late query, somehow this email went into a folder and escaped;
 
 Just to be sure, there is not lock at generic struct iommu_domain, but IP
 specific structure (link FSL domain) linked in iommu_domain-priv have a lock,
 so we need to ensure this race in FSL iommu code (say
 drivers/iommu/fsl_pamu_domain.c), right?

Further thinking of this, there are more problems here:
 - Like MSI subsystem will call iommu_get_dev_domain(), which will take a lock, 
find the domain pointer, release the lock, and return the domain
 - Now if domain in freed up
 - While MSI subsystem tries to do work on domain (like 
get_attribute/set_attribute etc) ???

So can we do like iommu_get_dev_domain() will return domain with the lock held, 
and iommu_put_dev_domain() will release the lock? And iommu_get_dev_domain() 
must always be followed by iommu_get_dev_domain()

Thanks
-Bharat

 
 Thanks
 -Bharat
 
 
/*
 * IOMMU groups are really the natrual working unit of the IOMMU, but
 * the IOMMU API works on domains and devices.  Bridge that gap by
   diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
   7ea319e..fa046bd 100644
   --- a/include/linux/iommu.h
   +++ b/include/linux/iommu.h
   @@ -127,6 +127,7 @@ struct iommu_ops {
 int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
 /* Get the numer of window per domain */
 u32 (*domain_get_windows)(struct iommu_domain *domain);
   + struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
  
 unsigned long pgsize_bitmap;
};
   @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct
   iommu_domain
  *domain, u32 wnd_nr,
   phys_addr_t offset, u64 size,
   int prot);
extern void iommu_domain_window_disable(struct iommu_domain
   *domain,
   u32 wnd_nr);
   +extern struct iommu_domain *iommu_get_dev_domain(struct device
   +*dev);
/**
 * report_iommu_fault() - report about an IOMMU fault to the IOMMU
 framework
 * @domain: the iommu domain where the fault has happened @@ -284,6
   +286,11 @@ static inline void iommu_domain_window_disable(struct
   iommu_domain *domain,  {  }
  
   +static inline struct iommu_domain *iommu_get_dev_domain(struct
   +device
   +*dev) {
   + return NULL;
   +}
   +
static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
   *domain, dma_addr_t iova)  {
 return 0;
 
 
 
  --
  To unsubscribe from this list: send the line unsubscribe linux-pci
  in the body of a message to majord...@vger.kernel.org More majordomo
  info at http://vger.kernel.org/majordomo-info.html



RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, October 04, 2013 9:15 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: linux-pci-ow...@vger.kernel.org
   [mailto:linux-pci-ow...@vger.kernel.org]
   On Behalf Of Alex Williamson
   Sent: Wednesday, September 25, 2013 10:16 PM
   To: Bhushan Bharat-R65777
   Cc: j...@8bytes.org; b...@kernel.crashing.org;
   ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
   linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
   ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org;
   Bhushan Bharat-R65777
   Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
   device
  
   On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
This api return the iommu domain to which the device is attached.
The iommu_domain is required for making API calls related to iommu.
Follow up patches which use this API to know iommu maping.
   
Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
 drivers/iommu/iommu.c |   10 ++
 include/linux/iommu.h |7 +++
 2 files changed, 17 insertions(+), 0 deletions(-)
   
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
fbe9ca7..6ac5f50 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
*domain, struct device *dev)  }
EXPORT_SYMBOL_GPL(iommu_detach_device);
   
+struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
+   struct iommu_ops *ops = dev-bus-iommu_ops;
+
+   if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL))
+   return NULL;
+
+   return ops-get_dev_iommu_domain(dev); }
+EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
  
   What prevents this from racing iommu_domain_free()?  There's no
   references acquired, so there's no reason for the caller to assume the
 pointer is valid.
 
  Sorry for late query, somehow this email went into a folder and
  escaped;
 
  Just to be sure, there is not lock at generic struct iommu_domain, but IP
 specific structure (link FSL domain) linked in iommu_domain-priv have a lock,
 so we need to ensure this race in FSL iommu code (say
 drivers/iommu/fsl_pamu_domain.c), right?
 
 No, it's not sufficient to make sure that your use of the interface is race
 free.  The interface itself needs to be designed so that it's difficult to use
 incorrectly.

So we can define iommu_get_dev_domain()/iommu_put_dev_domain();  
iommu_get_dev_domain() will return domain with the lock held, and 
iommu_put_dev_domain() will release the lock? And iommu_get_dev_domain() must 
always be followed by iommu_get_dev_domain().


 That's not the case here.  This is a backdoor to get the iommu
 domain from the iommu driver regardless of who is using it or how.  The iommu
 domain is created and managed by vfio, so shouldn't we be looking at how to do
 this through vfio?

Let me first describe what we are doing here:
During initialization:-
 - vfio talks to MSI system to know the MSI-page and size
 - vfio then interacts with iommu to map the MSI-page in iommu (IOVA is decided 
by userspace and physical address is the MSI-page)
 - So the IOVA subwindow mapping is created in iommu and yes VFIO know about 
this mapping.

Now do SET_IRQ(MSI/MSIX) ioctl:
 - calls pci_enable_msix()/pci_enable_msi_block(): which is supposed to set MSI 
address/data in device.
 - So in current implementation (this patchset) msi-subsystem gets the IOVA 
from iommu via this defined interface.
 - Are you saying that rather than getting this from iommu, we should get this 
from vfio? What difference does this make?

Thanks
-Bharat

 It seems like you'd want to use your device to get a vfio
 group reference, from which you could do something with the vfio external user
 interface and get the iommu domain reference.  Thanks,
 
 Alex
 
 /*
  * IOMMU groups are really the natrual working unit of the IOMMU, but
  * the IOMMU API works on domains and devices.  Bridge that gap
by diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7ea319e..fa046bd 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -127,6 +127,7 @@ struct iommu_ops {
int (*domain_set_windows)(struct iommu_domain *domain, u32
 w_count);
/* Get the numer of window per domain */
u32 (*domain_get_windows)(struct iommu_domain *domain);
+   struct iommu_domain *(*get_dev_iommu_domain

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, October 04, 2013 10:43 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 On Fri, 2013-10-04 at 16:47 +, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Alex Williamson [mailto:alex.william...@redhat.com]
   Sent: Friday, October 04, 2013 9:15 PM
   To: Bhushan Bharat-R65777
   Cc: j...@8bytes.org; b...@kernel.crashing.org;
   ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
   linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
   ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
   Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
   device
  
   On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote:
   
 -Original Message-
 From: linux-pci-ow...@vger.kernel.org
 [mailto:linux-pci-ow...@vger.kernel.org]
 On Behalf Of Alex Williamson
 Sent: Wednesday, September 25, 2013 10:16 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org;
 ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
 linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
 ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
 device

 On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
  This api return the iommu domain to which the device is attached.
  The iommu_domain is required for making API calls related to iommu.
  Follow up patches which use this API to know iommu maping.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
   drivers/iommu/iommu.c |   10 ++
   include/linux/iommu.h |7 +++
   2 files changed, 17 insertions(+), 0 deletions(-)
 
  diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
  index
  fbe9ca7..6ac5f50 100644
  --- a/drivers/iommu/iommu.c
  +++ b/drivers/iommu/iommu.c
  @@ -696,6 +696,16 @@ void iommu_detach_device(struct
  iommu_domain *domain, struct device *dev)  }
  EXPORT_SYMBOL_GPL(iommu_detach_device);
 
  +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
  +   struct iommu_ops *ops = dev-bus-iommu_ops;
  +
  +   if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL))
  +   return NULL;
  +
  +   return ops-get_dev_iommu_domain(dev); }
  +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);

 What prevents this from racing iommu_domain_free()?  There's no
 references acquired, so there's no reason for the caller to
 assume the
   pointer is valid.
   
Sorry for late query, somehow this email went into a folder and
escaped;
   
Just to be sure, there is not lock at generic struct
iommu_domain, but IP
   specific structure (link FSL domain) linked in iommu_domain-priv
   have a lock, so we need to ensure this race in FSL iommu code (say
   drivers/iommu/fsl_pamu_domain.c), right?
  
   No, it's not sufficient to make sure that your use of the interface
   is race free.  The interface itself needs to be designed so that
   it's difficult to use incorrectly.
 
  So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
  iommu_get_dev_domain() will return domain with the lock held, and
  iommu_put_dev_domain() will release the lock? And
  iommu_get_dev_domain() must always be followed by
  iommu_get_dev_domain().
 
 What lock?  get/put are generally used for reference counting, not locking in
 the kernel.
 
   That's not the case here.  This is a backdoor to get the iommu
   domain from the iommu driver regardless of who is using it or how.
   The iommu domain is created and managed by vfio, so shouldn't we be
   looking at how to do this through vfio?
 
  Let me first describe what we are doing here:
  During initialization:-
   - vfio talks to MSI system to know the MSI-page and size
   - vfio then interacts with iommu to map the MSI-page in iommu (IOVA
  is decided by userspace and physical address is the MSI-page)
   - So the IOVA subwindow mapping is created in iommu and yes VFIO know about
 this mapping.
 
  Now do SET_IRQ(MSI/MSIX) ioctl:
   - calls pci_enable_msix()/pci_enable_msi_block(): which is supposed to set
 MSI address/data in device.
   - So in current implementation (this patchset) msi-subsystem gets the IOVA
 from iommu via this defined interface.
   - Are you saying that rather than getting this from iommu, we should get 
  this
 from vfio? What difference does this make?
 
 Yes, you just said above

RE: [PATCH 4/6 v5] kvm: powerpc: keep only pte search logic in lookup_linux_pte

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, October 04, 2013 6:57 PM
 To: Bhushan Bharat-R65777
 Cc: b...@kernel.crashing.org; pau...@samba.org; kvm@vger.kernel.org; kvm-
 p...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; Wood Scott-B07421; 
 Bhushan
 Bharat-R65777
 Subject: Re: [PATCH 4/6 v5] kvm: powerpc: keep only pte search logic in
 lookup_linux_pte
 
 
 On 19.09.2013, at 08:02, Bharat Bhushan wrote:
 
  lookup_linux_pte() was searching for a pte and also sets access flags
  is writable. This function now searches only pte while access flag
  setting is done explicitly.
 
  This pte lookup is not kvm specific, so moved to common code
  (asm/pgtable.h) My Followup patch will use this on booke.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  v4-v5
  - No change
 
  arch/powerpc/include/asm/pgtable.h  |   24 +++
  arch/powerpc/kvm/book3s_hv_rm_mmu.c |   36 
  +++---
  2 files changed, 36 insertions(+), 24 deletions(-)
 
  diff --git a/arch/powerpc/include/asm/pgtable.h
  b/arch/powerpc/include/asm/pgtable.h
  index 7d6eacf..3a5de5c 100644
  --- a/arch/powerpc/include/asm/pgtable.h
  +++ b/arch/powerpc/include/asm/pgtable.h
  @@ -223,6 +223,30 @@ extern int gup_hugepte(pte_t *ptep, unsigned long
  sz, unsigned long addr, #endif pte_t *find_linux_pte_or_hugepte(pgd_t
  *pgdir, unsigned long ea,
   unsigned *shift);
  +
  +static inline pte_t *lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
  +unsigned long *pte_sizep)
  +{
  +   pte_t *ptep;
  +   unsigned long ps = *pte_sizep;
  +   unsigned int shift;
  +
  +   ptep = find_linux_pte_or_hugepte(pgdir, hva, shift);
  +   if (!ptep)
  +   return __pte(0);
 
 This returns a struct pte_t, but your return value of the function is a struct
 pte_t *. So this code will fail compiling with STRICT_MM_TYPECHECKS set. Any
 reason you don't just return NULL here?

I want to return the ptep (pte pointer) , so yes this should be NULL.
Will correct this.

Thanks
-Bharat

 
 That way callers could simply check on if (ptep) ... or you leave the return
 value as struct pte_t.
 
 
 Alex
 
  +   if (shift)
  +   *pte_sizep = 1ul  shift;
  +   else
  +   *pte_sizep = PAGE_SIZE;
  +
  +   if (ps  *pte_sizep)
  +   return __pte(0);
  +
  +   if (!pte_present(*ptep))
  +   return __pte(0);
 
  +
  +   return ptep;
  +}
  #endif /* __ASSEMBLY__ */
 
  #endif /* __KERNEL__ */
  diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  index 45e30d6..74fa7f8 100644
  --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  @@ -134,25 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long
 pte_index,
  unlock_rmap(rmap);
  }
 
  -static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
  - int writing, unsigned long *pte_sizep)
  -{
  -   pte_t *ptep;
  -   unsigned long ps = *pte_sizep;
  -   unsigned int hugepage_shift;
  -
  -   ptep = find_linux_pte_or_hugepte(pgdir, hva, hugepage_shift);
  -   if (!ptep)
  -   return __pte(0);
  -   if (hugepage_shift)
  -   *pte_sizep = 1ul  hugepage_shift;
  -   else
  -   *pte_sizep = PAGE_SIZE;
  -   if (ps  *pte_sizep)
  -   return __pte(0);
  -   return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
  -}
  -
  static inline void unlock_hpte(unsigned long *hpte, unsigned long
  hpte_v) {
  asm volatile(PPC_RELEASE_BARRIER  : : : memory); @@ -173,6 +154,7
  @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
  unsigned long is_io;
  unsigned long *rmap;
  pte_t pte;
  +   pte_t *ptep;
  unsigned int writing;
  unsigned long mmu_seq;
  unsigned long rcbits;
  @@ -231,8 +213,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned
  long flags,
 
  /* Look up the Linux PTE for the backing page */
  pte_size = psize;
  -   pte = lookup_linux_pte(pgdir, hva, writing, pte_size);
  -   if (pte_present(pte)) {
  +   ptep = lookup_linux_pte(pgdir, hva, pte_size);
  +   if (pte_present(pte_val(*ptep))) {
  +   pte = kvmppc_read_update_linux_pte(ptep, writing);
  if (writing  !pte_write(pte))
  /* make the actual HPTE be read-only */
  ptel = hpte_make_readonly(ptel);
  @@ -661,15 +644,20 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned
 long flags,
  struct kvm_memory_slot *memslot;
  pgd_t *pgdir = vcpu-arch.pgdir;
  pte_t pte;
  +   pte_t *ptep;
 
  psize = hpte_page_size(v, r);
  gfn = ((r  HPTE_R_RPN)  ~(psize - 1))  PAGE_SHIFT;
  memslot

RE: [PATCH 1/3 v6] kvm: powerpc: keep only pte search logic in lookup_linux_pte

2013-10-04 Thread Bhushan Bharat-R65777
Adding Paul

 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, October 04, 2013 8:49 PM
 To: Bhushan Bharat-R65777
 Cc: kvm@vger.kernel.org; kvm-...@vger.kernel.org; Wood Scott-B07421;
 b...@kernel.crashing.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 1/3 v6] kvm: powerpc: keep only pte search logic in
 lookup_linux_pte
 
 
 On 04.10.2013, at 16:55, Bharat Bhushan wrote:
 
  lookup_linux_pte() was searching for a pte and also sets access flags
  is writable. This function now searches only pte while access flag
  setting is done explicitly.
 
  This pte lookup is not kvm specific, so moved to common code
  (asm/pgtable.h) My Followup patch will use this on booke.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
 
 Paul, please ack.
 
 
 Alex
 
  ---
  v5-v6
  - return NULL rather than _pte(0) as this was
giving compilation error with STRICT_MM_TYPECHECKS
  - Also not only check for NULL pointer in caller rather than
calling pte_present() twice
 
  arch/powerpc/include/asm/pgtable.h  |   24 +++
  arch/powerpc/kvm/book3s_hv_rm_mmu.c |   36 
  +++---
  2 files changed, 36 insertions(+), 24 deletions(-)
 
  diff --git a/arch/powerpc/include/asm/pgtable.h
  b/arch/powerpc/include/asm/pgtable.h
  index 7d6eacf..5e41a31 100644
  --- a/arch/powerpc/include/asm/pgtable.h
  +++ b/arch/powerpc/include/asm/pgtable.h
  @@ -223,6 +223,30 @@ extern int gup_hugepte(pte_t *ptep, unsigned long
  sz, unsigned long addr, #endif pte_t *find_linux_pte_or_hugepte(pgd_t
  *pgdir, unsigned long ea,
   unsigned *shift);
  +
  +static inline pte_t *lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
  +unsigned long *pte_sizep)
  +{
  +   pte_t *ptep;
  +   unsigned long ps = *pte_sizep;
  +   unsigned int shift;
  +
  +   ptep = find_linux_pte_or_hugepte(pgdir, hva, shift);
  +   if (!ptep)
  +   return NULL;
  +   if (shift)
  +   *pte_sizep = 1ul  shift;
  +   else
  +   *pte_sizep = PAGE_SIZE;
  +
  +   if (ps  *pte_sizep)
  +   return NULL;
  +
  +   if (!pte_present(*ptep))
  +   return NULL;
  +
  +   return ptep;
  +}
  #endif /* __ASSEMBLY__ */
 
  #endif /* __KERNEL__ */
  diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  index 45e30d6..8ab54e8 100644
  --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  @@ -134,25 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long
 pte_index,
  unlock_rmap(rmap);
  }
 
  -static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
  - int writing, unsigned long *pte_sizep)
  -{
  -   pte_t *ptep;
  -   unsigned long ps = *pte_sizep;
  -   unsigned int hugepage_shift;
  -
  -   ptep = find_linux_pte_or_hugepte(pgdir, hva, hugepage_shift);
  -   if (!ptep)
  -   return __pte(0);
  -   if (hugepage_shift)
  -   *pte_sizep = 1ul  hugepage_shift;
  -   else
  -   *pte_sizep = PAGE_SIZE;
  -   if (ps  *pte_sizep)
  -   return __pte(0);
  -   return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
  -}
  -
  static inline void unlock_hpte(unsigned long *hpte, unsigned long
  hpte_v) {
  asm volatile(PPC_RELEASE_BARRIER  : : : memory); @@ -173,6 +154,7
  @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
  unsigned long is_io;
  unsigned long *rmap;
  pte_t pte;
  +   pte_t *ptep;
  unsigned int writing;
  unsigned long mmu_seq;
  unsigned long rcbits;
  @@ -231,8 +213,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned
  long flags,
 
  /* Look up the Linux PTE for the backing page */
  pte_size = psize;
  -   pte = lookup_linux_pte(pgdir, hva, writing, pte_size);
  -   if (pte_present(pte)) {
  +   ptep = lookup_linux_pte(pgdir, hva, pte_size);
  +   if (ptep) {
  +   pte = kvmppc_read_update_linux_pte(ptep, writing);
  if (writing  !pte_write(pte))
  /* make the actual HPTE be read-only */
  ptel = hpte_make_readonly(ptel);
  @@ -661,15 +644,20 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned
 long flags,
  struct kvm_memory_slot *memslot;
  pgd_t *pgdir = vcpu-arch.pgdir;
  pte_t pte;
  +   pte_t *ptep;
 
  psize = hpte_page_size(v, r);
  gfn = ((r  HPTE_R_RPN)  ~(psize - 1))  PAGE_SHIFT;
  memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
  if (memslot) {
  hva = __gfn_to_hva_memslot(memslot, gfn);
  -   pte = lookup_linux_pte(pgdir, hva, 1, psize);
  -   if (pte_present(pte)  !pte_write(pte

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: linux-pci-ow...@vger.kernel.org [mailto:linux-pci-ow...@vger.kernel.org]
 On Behalf Of Alex Williamson
 Sent: Wednesday, September 25, 2013 10:16 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
  This api return the iommu domain to which the device is attached.
  The iommu_domain is required for making API calls related to iommu.
  Follow up patches which use this API to know iommu maping.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
   drivers/iommu/iommu.c |   10 ++
   include/linux/iommu.h |7 +++
   2 files changed, 17 insertions(+), 0 deletions(-)
 
  diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
  fbe9ca7..6ac5f50 100644
  --- a/drivers/iommu/iommu.c
  +++ b/drivers/iommu/iommu.c
  @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
  *domain, struct device *dev)  }
  EXPORT_SYMBOL_GPL(iommu_detach_device);
 
  +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
  +   struct iommu_ops *ops = dev-bus-iommu_ops;
  +
  +   if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL))
  +   return NULL;
  +
  +   return ops-get_dev_iommu_domain(dev); }
  +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
 
 What prevents this from racing iommu_domain_free()?  There's no references
 acquired, so there's no reason for the caller to assume the pointer is valid.

Sorry for late query, somehow this email went into a folder and escaped;

Just to be sure, there is not lock at generic struct iommu_domain, but IP 
specific structure (link FSL domain) linked in iommu_domain-priv have a lock, 
so we need to ensure this race in FSL iommu code (say 
drivers/iommu/fsl_pamu_domain.c), right?

Thanks
-Bharat

 
   /*
* IOMMU groups are really the natrual working unit of the IOMMU, but
* the IOMMU API works on domains and devices.  Bridge that gap by
  diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
  7ea319e..fa046bd 100644
  --- a/include/linux/iommu.h
  +++ b/include/linux/iommu.h
  @@ -127,6 +127,7 @@ struct iommu_ops {
  int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
  /* Get the numer of window per domain */
  u32 (*domain_get_windows)(struct iommu_domain *domain);
  +   struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
 
  unsigned long pgsize_bitmap;
   };
  @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct 
  iommu_domain
 *domain, u32 wnd_nr,
phys_addr_t offset, u64 size,
int prot);
   extern void iommu_domain_window_disable(struct iommu_domain *domain,
  u32 wnd_nr);
  +extern struct iommu_domain *iommu_get_dev_domain(struct device *dev);
   /**
* report_iommu_fault() - report about an IOMMU fault to the IOMMU 
  framework
* @domain: the iommu domain where the fault has happened @@ -284,6
  +286,11 @@ static inline void iommu_domain_window_disable(struct
  iommu_domain *domain,  {  }
 
  +static inline struct iommu_domain *iommu_get_dev_domain(struct device
  +*dev) {
  +   return NULL;
  +}
  +
   static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
  *domain, dma_addr_t iova)  {
  return 0;
 
 
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-pci in the 
 body
 of a message to majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Bhushan Bharat-R65777
 Sent: Friday, October 04, 2013 3:24 PM
 To: 'Alex Williamson'
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 
 
  -Original Message-
  From: linux-pci-ow...@vger.kernel.org
  [mailto:linux-pci-ow...@vger.kernel.org]
  On Behalf Of Alex Williamson
  Sent: Wednesday, September 25, 2013 10:16 PM
  To: Bhushan Bharat-R65777
  Cc: j...@8bytes.org; b...@kernel.crashing.org;
  ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
  linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
  ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org;
  Bhushan Bharat-R65777
  Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
  device
 
  On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
   This api return the iommu domain to which the device is attached.
   The iommu_domain is required for making API calls related to iommu.
   Follow up patches which use this API to know iommu maping.
  
   Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
   ---
drivers/iommu/iommu.c |   10 ++
include/linux/iommu.h |7 +++
2 files changed, 17 insertions(+), 0 deletions(-)
  
   diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
   fbe9ca7..6ac5f50 100644
   --- a/drivers/iommu/iommu.c
   +++ b/drivers/iommu/iommu.c
   @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
   *domain, struct device *dev)  }
   EXPORT_SYMBOL_GPL(iommu_detach_device);
  
   +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
   + struct iommu_ops *ops = dev-bus-iommu_ops;
   +
   + if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL))
   + return NULL;
   +
   + return ops-get_dev_iommu_domain(dev); }
   +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
 
  What prevents this from racing iommu_domain_free()?  There's no
  references acquired, so there's no reason for the caller to assume the 
  pointer
 is valid.
 
 Sorry for late query, somehow this email went into a folder and escaped;
 
 Just to be sure, there is not lock at generic struct iommu_domain, but IP
 specific structure (link FSL domain) linked in iommu_domain-priv have a lock,
 so we need to ensure this race in FSL iommu code (say
 drivers/iommu/fsl_pamu_domain.c), right?

Further thinking of this, there are more problems here:
 - Like MSI subsystem will call iommu_get_dev_domain(), which will take a lock, 
find the domain pointer, release the lock, and return the domain
 - Now if domain in freed up
 - While MSI subsystem tries to do work on domain (like 
get_attribute/set_attribute etc) ???

So can we do like iommu_get_dev_domain() will return domain with the lock held, 
and iommu_put_dev_domain() will release the lock? And iommu_get_dev_domain() 
must always be followed by iommu_get_dev_domain()

Thanks
-Bharat

 
 Thanks
 -Bharat
 
 
/*
 * IOMMU groups are really the natrual working unit of the IOMMU, but
 * the IOMMU API works on domains and devices.  Bridge that gap by
   diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
   7ea319e..fa046bd 100644
   --- a/include/linux/iommu.h
   +++ b/include/linux/iommu.h
   @@ -127,6 +127,7 @@ struct iommu_ops {
 int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
 /* Get the numer of window per domain */
 u32 (*domain_get_windows)(struct iommu_domain *domain);
   + struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
  
 unsigned long pgsize_bitmap;
};
   @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct
   iommu_domain
  *domain, u32 wnd_nr,
   phys_addr_t offset, u64 size,
   int prot);
extern void iommu_domain_window_disable(struct iommu_domain
   *domain,
   u32 wnd_nr);
   +extern struct iommu_domain *iommu_get_dev_domain(struct device
   +*dev);
/**
 * report_iommu_fault() - report about an IOMMU fault to the IOMMU
 framework
 * @domain: the iommu domain where the fault has happened @@ -284,6
   +286,11 @@ static inline void iommu_domain_window_disable(struct
   iommu_domain *domain,  {  }
  
   +static inline struct iommu_domain *iommu_get_dev_domain(struct
   +device
   +*dev) {
   + return NULL;
   +}
   +
static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
   *domain, dma_addr_t iova)  {
 return 0;
 
 
 
  --
  To unsubscribe from this list: send the line unsubscribe linux-pci
  in the body of a message to majord...@vger.kernel.org More majordomo
  info at http://vger.kernel.org/majordomo-info.html

___
iommu mailing list
iommu@lists.linux

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, October 04, 2013 9:15 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: linux-pci-ow...@vger.kernel.org
   [mailto:linux-pci-ow...@vger.kernel.org]
   On Behalf Of Alex Williamson
   Sent: Wednesday, September 25, 2013 10:16 PM
   To: Bhushan Bharat-R65777
   Cc: j...@8bytes.org; b...@kernel.crashing.org;
   ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
   linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
   ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org;
   Bhushan Bharat-R65777
   Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
   device
  
   On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
This api return the iommu domain to which the device is attached.
The iommu_domain is required for making API calls related to iommu.
Follow up patches which use this API to know iommu maping.
   
Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
---
 drivers/iommu/iommu.c |   10 ++
 include/linux/iommu.h |7 +++
 2 files changed, 17 insertions(+), 0 deletions(-)
   
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
fbe9ca7..6ac5f50 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
*domain, struct device *dev)  }
EXPORT_SYMBOL_GPL(iommu_detach_device);
   
+struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
+   struct iommu_ops *ops = dev-bus-iommu_ops;
+
+   if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL))
+   return NULL;
+
+   return ops-get_dev_iommu_domain(dev); }
+EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
  
   What prevents this from racing iommu_domain_free()?  There's no
   references acquired, so there's no reason for the caller to assume the
 pointer is valid.
 
  Sorry for late query, somehow this email went into a folder and
  escaped;
 
  Just to be sure, there is not lock at generic struct iommu_domain, but IP
 specific structure (link FSL domain) linked in iommu_domain-priv have a lock,
 so we need to ensure this race in FSL iommu code (say
 drivers/iommu/fsl_pamu_domain.c), right?
 
 No, it's not sufficient to make sure that your use of the interface is race
 free.  The interface itself needs to be designed so that it's difficult to use
 incorrectly.

So we can define iommu_get_dev_domain()/iommu_put_dev_domain();  
iommu_get_dev_domain() will return domain with the lock held, and 
iommu_put_dev_domain() will release the lock? And iommu_get_dev_domain() must 
always be followed by iommu_get_dev_domain().


 That's not the case here.  This is a backdoor to get the iommu
 domain from the iommu driver regardless of who is using it or how.  The iommu
 domain is created and managed by vfio, so shouldn't we be looking at how to do
 this through vfio?

Let me first describe what we are doing here:
During initialization:-
 - vfio talks to MSI system to know the MSI-page and size
 - vfio then interacts with iommu to map the MSI-page in iommu (IOVA is decided 
by userspace and physical address is the MSI-page)
 - So the IOVA subwindow mapping is created in iommu and yes VFIO know about 
this mapping.

Now do SET_IRQ(MSI/MSIX) ioctl:
 - calls pci_enable_msix()/pci_enable_msi_block(): which is supposed to set MSI 
address/data in device.
 - So in current implementation (this patchset) msi-subsystem gets the IOVA 
from iommu via this defined interface.
 - Are you saying that rather than getting this from iommu, we should get this 
from vfio? What difference does this make?

Thanks
-Bharat

 It seems like you'd want to use your device to get a vfio
 group reference, from which you could do something with the vfio external user
 interface and get the iommu domain reference.  Thanks,
 
 Alex
 
 /*
  * IOMMU groups are really the natrual working unit of the IOMMU, but
  * the IOMMU API works on domains and devices.  Bridge that gap
by diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7ea319e..fa046bd 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -127,6 +127,7 @@ struct iommu_ops {
int (*domain_set_windows)(struct iommu_domain *domain, u32
 w_count);
/* Get the numer of window per domain */
u32 (*domain_get_windows)(struct iommu_domain *domain);
+   struct iommu_domain *(*get_dev_iommu_domain

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alex Williamson [mailto:alex.william...@redhat.com]
 Sent: Friday, October 04, 2013 10:43 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 On Fri, 2013-10-04 at 16:47 +, Bhushan Bharat-R65777 wrote:
 
   -Original Message-
   From: Alex Williamson [mailto:alex.william...@redhat.com]
   Sent: Friday, October 04, 2013 9:15 PM
   To: Bhushan Bharat-R65777
   Cc: j...@8bytes.org; b...@kernel.crashing.org;
   ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
   linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
   ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org
   Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
   device
  
   On Fri, 2013-10-04 at 09:54 +, Bhushan Bharat-R65777 wrote:
   
 -Original Message-
 From: linux-pci-ow...@vger.kernel.org
 [mailto:linux-pci-ow...@vger.kernel.org]
 On Behalf Of Alex Williamson
 Sent: Wednesday, September 25, 2013 10:16 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org;
 ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
 linuxppc-...@lists.ozlabs.org; linux- p...@vger.kernel.org;
 ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
 device

 On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
  This api return the iommu domain to which the device is attached.
  The iommu_domain is required for making API calls related to iommu.
  Follow up patches which use this API to know iommu maping.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
   drivers/iommu/iommu.c |   10 ++
   include/linux/iommu.h |7 +++
   2 files changed, 17 insertions(+), 0 deletions(-)
 
  diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
  index
  fbe9ca7..6ac5f50 100644
  --- a/drivers/iommu/iommu.c
  +++ b/drivers/iommu/iommu.c
  @@ -696,6 +696,16 @@ void iommu_detach_device(struct
  iommu_domain *domain, struct device *dev)  }
  EXPORT_SYMBOL_GPL(iommu_detach_device);
 
  +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
  +   struct iommu_ops *ops = dev-bus-iommu_ops;
  +
  +   if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL))
  +   return NULL;
  +
  +   return ops-get_dev_iommu_domain(dev); }
  +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);

 What prevents this from racing iommu_domain_free()?  There's no
 references acquired, so there's no reason for the caller to
 assume the
   pointer is valid.
   
Sorry for late query, somehow this email went into a folder and
escaped;
   
Just to be sure, there is not lock at generic struct
iommu_domain, but IP
   specific structure (link FSL domain) linked in iommu_domain-priv
   have a lock, so we need to ensure this race in FSL iommu code (say
   drivers/iommu/fsl_pamu_domain.c), right?
  
   No, it's not sufficient to make sure that your use of the interface
   is race free.  The interface itself needs to be designed so that
   it's difficult to use incorrectly.
 
  So we can define iommu_get_dev_domain()/iommu_put_dev_domain();
  iommu_get_dev_domain() will return domain with the lock held, and
  iommu_put_dev_domain() will release the lock? And
  iommu_get_dev_domain() must always be followed by
  iommu_get_dev_domain().
 
 What lock?  get/put are generally used for reference counting, not locking in
 the kernel.
 
   That's not the case here.  This is a backdoor to get the iommu
   domain from the iommu driver regardless of who is using it or how.
   The iommu domain is created and managed by vfio, so shouldn't we be
   looking at how to do this through vfio?
 
  Let me first describe what we are doing here:
  During initialization:-
   - vfio talks to MSI system to know the MSI-page and size
   - vfio then interacts with iommu to map the MSI-page in iommu (IOVA
  is decided by userspace and physical address is the MSI-page)
   - So the IOVA subwindow mapping is created in iommu and yes VFIO know about
 this mapping.
 
  Now do SET_IRQ(MSI/MSIX) ioctl:
   - calls pci_enable_msix()/pci_enable_msi_block(): which is supposed to set
 MSI address/data in device.
   - So in current implementation (this patchset) msi-subsystem gets the IOVA
 from iommu via this defined interface.
   - Are you saying that rather than getting this from iommu, we should get 
  this
 from vfio? What difference does this make?
 
 Yes, you just said above

RE: [PATCH 4/6 v5] kvm: powerpc: keep only pte search logic in lookup_linux_pte

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Alexander Graf [mailto:ag...@suse.de]
 Sent: Friday, October 04, 2013 6:57 PM
 To: Bhushan Bharat-R65777
 Cc: b...@kernel.crashing.org; pau...@samba.org; k...@vger.kernel.org; kvm-
 p...@vger.kernel.org; linuxppc-...@lists.ozlabs.org; Wood Scott-B07421; 
 Bhushan
 Bharat-R65777
 Subject: Re: [PATCH 4/6 v5] kvm: powerpc: keep only pte search logic in
 lookup_linux_pte
 
 
 On 19.09.2013, at 08:02, Bharat Bhushan wrote:
 
  lookup_linux_pte() was searching for a pte and also sets access flags
  is writable. This function now searches only pte while access flag
  setting is done explicitly.
 
  This pte lookup is not kvm specific, so moved to common code
  (asm/pgtable.h) My Followup patch will use this on booke.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
  v4-v5
  - No change
 
  arch/powerpc/include/asm/pgtable.h  |   24 +++
  arch/powerpc/kvm/book3s_hv_rm_mmu.c |   36 
  +++---
  2 files changed, 36 insertions(+), 24 deletions(-)
 
  diff --git a/arch/powerpc/include/asm/pgtable.h
  b/arch/powerpc/include/asm/pgtable.h
  index 7d6eacf..3a5de5c 100644
  --- a/arch/powerpc/include/asm/pgtable.h
  +++ b/arch/powerpc/include/asm/pgtable.h
  @@ -223,6 +223,30 @@ extern int gup_hugepte(pte_t *ptep, unsigned long
  sz, unsigned long addr, #endif pte_t *find_linux_pte_or_hugepte(pgd_t
  *pgdir, unsigned long ea,
   unsigned *shift);
  +
  +static inline pte_t *lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
  +unsigned long *pte_sizep)
  +{
  +   pte_t *ptep;
  +   unsigned long ps = *pte_sizep;
  +   unsigned int shift;
  +
  +   ptep = find_linux_pte_or_hugepte(pgdir, hva, shift);
  +   if (!ptep)
  +   return __pte(0);
 
 This returns a struct pte_t, but your return value of the function is a struct
 pte_t *. So this code will fail compiling with STRICT_MM_TYPECHECKS set. Any
 reason you don't just return NULL here?

I want to return the ptep (pte pointer) , so yes this should be NULL.
Will correct this.

Thanks
-Bharat

 
 That way callers could simply check on if (ptep) ... or you leave the return
 value as struct pte_t.
 
 
 Alex
 
  +   if (shift)
  +   *pte_sizep = 1ul  shift;
  +   else
  +   *pte_sizep = PAGE_SIZE;
  +
  +   if (ps  *pte_sizep)
  +   return __pte(0);
  +
  +   if (!pte_present(*ptep))
  +   return __pte(0);
 
  +
  +   return ptep;
  +}
  #endif /* __ASSEMBLY__ */
 
  #endif /* __KERNEL__ */
  diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  index 45e30d6..74fa7f8 100644
  --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
  @@ -134,25 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long
 pte_index,
  unlock_rmap(rmap);
  }
 
  -static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
  - int writing, unsigned long *pte_sizep)
  -{
  -   pte_t *ptep;
  -   unsigned long ps = *pte_sizep;
  -   unsigned int hugepage_shift;
  -
  -   ptep = find_linux_pte_or_hugepte(pgdir, hva, hugepage_shift);
  -   if (!ptep)
  -   return __pte(0);
  -   if (hugepage_shift)
  -   *pte_sizep = 1ul  hugepage_shift;
  -   else
  -   *pte_sizep = PAGE_SIZE;
  -   if (ps  *pte_sizep)
  -   return __pte(0);
  -   return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
  -}
  -
  static inline void unlock_hpte(unsigned long *hpte, unsigned long
  hpte_v) {
  asm volatile(PPC_RELEASE_BARRIER  : : : memory); @@ -173,6 +154,7
  @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
  unsigned long is_io;
  unsigned long *rmap;
  pte_t pte;
  +   pte_t *ptep;
  unsigned int writing;
  unsigned long mmu_seq;
  unsigned long rcbits;
  @@ -231,8 +213,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned
  long flags,
 
  /* Look up the Linux PTE for the backing page */
  pte_size = psize;
  -   pte = lookup_linux_pte(pgdir, hva, writing, pte_size);
  -   if (pte_present(pte)) {
  +   ptep = lookup_linux_pte(pgdir, hva, pte_size);
  +   if (pte_present(pte_val(*ptep))) {
  +   pte = kvmppc_read_update_linux_pte(ptep, writing);
  if (writing  !pte_write(pte))
  /* make the actual HPTE be read-only */
  ptel = hpte_make_readonly(ptel);
  @@ -661,15 +644,20 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned
 long flags,
  struct kvm_memory_slot *memslot;
  pgd_t *pgdir = vcpu-arch.pgdir;
  pte_t pte;
  +   pte_t *ptep;
 
  psize = hpte_page_size(v, r);
  gfn = ((r  HPTE_R_RPN)  ~(psize - 1))  PAGE_SHIFT;
  memslot

RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: linux-pci-ow...@vger.kernel.org [mailto:linux-pci-ow...@vger.kernel.org]
 On Behalf Of Alex Williamson
 Sent: Wednesday, September 25, 2013 10:16 PM
 To: Bhushan Bharat-R65777
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org; Bhushan Bharat-R65777
 Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
  This api return the iommu domain to which the device is attached.
  The iommu_domain is required for making API calls related to iommu.
  Follow up patches which use this API to know iommu maping.
 
  Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
  ---
   drivers/iommu/iommu.c |   10 ++
   include/linux/iommu.h |7 +++
   2 files changed, 17 insertions(+), 0 deletions(-)
 
  diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
  fbe9ca7..6ac5f50 100644
  --- a/drivers/iommu/iommu.c
  +++ b/drivers/iommu/iommu.c
  @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
  *domain, struct device *dev)  }
  EXPORT_SYMBOL_GPL(iommu_detach_device);
 
  +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
  +   struct iommu_ops *ops = dev-bus-iommu_ops;
  +
  +   if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL))
  +   return NULL;
  +
  +   return ops-get_dev_iommu_domain(dev); }
  +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
 
 What prevents this from racing iommu_domain_free()?  There's no references
 acquired, so there's no reason for the caller to assume the pointer is valid.

Sorry for late query, somehow this email went into a folder and escaped;

Just to be sure, there is not lock at generic struct iommu_domain, but IP 
specific structure (link FSL domain) linked in iommu_domain-priv have a lock, 
so we need to ensure this race in FSL iommu code (say 
drivers/iommu/fsl_pamu_domain.c), right?

Thanks
-Bharat

 
   /*
* IOMMU groups are really the natrual working unit of the IOMMU, but
* the IOMMU API works on domains and devices.  Bridge that gap by
  diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
  7ea319e..fa046bd 100644
  --- a/include/linux/iommu.h
  +++ b/include/linux/iommu.h
  @@ -127,6 +127,7 @@ struct iommu_ops {
  int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
  /* Get the numer of window per domain */
  u32 (*domain_get_windows)(struct iommu_domain *domain);
  +   struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
 
  unsigned long pgsize_bitmap;
   };
  @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct 
  iommu_domain
 *domain, u32 wnd_nr,
phys_addr_t offset, u64 size,
int prot);
   extern void iommu_domain_window_disable(struct iommu_domain *domain,
  u32 wnd_nr);
  +extern struct iommu_domain *iommu_get_dev_domain(struct device *dev);
   /**
* report_iommu_fault() - report about an IOMMU fault to the IOMMU 
  framework
* @domain: the iommu domain where the fault has happened @@ -284,6
  +286,11 @@ static inline void iommu_domain_window_disable(struct
  iommu_domain *domain,  {  }
 
  +static inline struct iommu_domain *iommu_get_dev_domain(struct device
  +*dev) {
  +   return NULL;
  +}
  +
   static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
  *domain, dma_addr_t iova)  {
  return 0;
 
 
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-pci in the 
 body
 of a message to majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device

2013-10-04 Thread Bhushan Bharat-R65777


 -Original Message-
 From: Bhushan Bharat-R65777
 Sent: Friday, October 04, 2013 3:24 PM
 To: 'Alex Williamson'
 Cc: j...@8bytes.org; b...@kernel.crashing.org; ga...@kernel.crashing.org; 
 linux-
 ker...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; linux-
 p...@vger.kernel.org; ag...@suse.de; Wood Scott-B07421; iommu@lists.linux-
 foundation.org
 Subject: RE: [PATCH 2/7] iommu: add api to get iommu_domain of a device
 
 
 
  -Original Message-
  From: linux-pci-ow...@vger.kernel.org
  [mailto:linux-pci-ow...@vger.kernel.org]
  On Behalf Of Alex Williamson
  Sent: Wednesday, September 25, 2013 10:16 PM
  To: Bhushan Bharat-R65777
  Cc: j...@8bytes.org; b...@kernel.crashing.org;
  ga...@kernel.crashing.org; linux- ker...@vger.kernel.org;
  linuxppc-dev@lists.ozlabs.org; linux- p...@vger.kernel.org;
  ag...@suse.de; Wood Scott-B07421; iommu@lists.linux- foundation.org;
  Bhushan Bharat-R65777
  Subject: Re: [PATCH 2/7] iommu: add api to get iommu_domain of a
  device
 
  On Thu, 2013-09-19 at 12:59 +0530, Bharat Bhushan wrote:
   This api return the iommu domain to which the device is attached.
   The iommu_domain is required for making API calls related to iommu.
   Follow up patches which use this API to know iommu maping.
  
   Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com
   ---
drivers/iommu/iommu.c |   10 ++
include/linux/iommu.h |7 +++
2 files changed, 17 insertions(+), 0 deletions(-)
  
   diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index
   fbe9ca7..6ac5f50 100644
   --- a/drivers/iommu/iommu.c
   +++ b/drivers/iommu/iommu.c
   @@ -696,6 +696,16 @@ void iommu_detach_device(struct iommu_domain
   *domain, struct device *dev)  }
   EXPORT_SYMBOL_GPL(iommu_detach_device);
  
   +struct iommu_domain *iommu_get_dev_domain(struct device *dev) {
   + struct iommu_ops *ops = dev-bus-iommu_ops;
   +
   + if (unlikely(ops == NULL || ops-get_dev_iommu_domain == NULL))
   + return NULL;
   +
   + return ops-get_dev_iommu_domain(dev); }
   +EXPORT_SYMBOL_GPL(iommu_get_dev_domain);
 
  What prevents this from racing iommu_domain_free()?  There's no
  references acquired, so there's no reason for the caller to assume the 
  pointer
 is valid.
 
 Sorry for late query, somehow this email went into a folder and escaped;
 
 Just to be sure, there is not lock at generic struct iommu_domain, but IP
 specific structure (link FSL domain) linked in iommu_domain-priv have a lock,
 so we need to ensure this race in FSL iommu code (say
 drivers/iommu/fsl_pamu_domain.c), right?

Further thinking of this, there are more problems here:
 - Like MSI subsystem will call iommu_get_dev_domain(), which will take a lock, 
find the domain pointer, release the lock, and return the domain
 - Now if domain in freed up
 - While MSI subsystem tries to do work on domain (like 
get_attribute/set_attribute etc) ???

So can we do like iommu_get_dev_domain() will return domain with the lock held, 
and iommu_put_dev_domain() will release the lock? And iommu_get_dev_domain() 
must always be followed by iommu_get_dev_domain()

Thanks
-Bharat

 
 Thanks
 -Bharat
 
 
/*
 * IOMMU groups are really the natrual working unit of the IOMMU, but
 * the IOMMU API works on domains and devices.  Bridge that gap by
   diff --git a/include/linux/iommu.h b/include/linux/iommu.h index
   7ea319e..fa046bd 100644
   --- a/include/linux/iommu.h
   +++ b/include/linux/iommu.h
   @@ -127,6 +127,7 @@ struct iommu_ops {
 int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
 /* Get the numer of window per domain */
 u32 (*domain_get_windows)(struct iommu_domain *domain);
   + struct iommu_domain *(*get_dev_iommu_domain)(struct device *dev);
  
 unsigned long pgsize_bitmap;
};
   @@ -190,6 +191,7 @@ extern int iommu_domain_window_enable(struct
   iommu_domain
  *domain, u32 wnd_nr,
   phys_addr_t offset, u64 size,
   int prot);
extern void iommu_domain_window_disable(struct iommu_domain
   *domain,
   u32 wnd_nr);
   +extern struct iommu_domain *iommu_get_dev_domain(struct device
   +*dev);
/**
 * report_iommu_fault() - report about an IOMMU fault to the IOMMU
 framework
 * @domain: the iommu domain where the fault has happened @@ -284,6
   +286,11 @@ static inline void iommu_domain_window_disable(struct
   iommu_domain *domain,  {  }
  
   +static inline struct iommu_domain *iommu_get_dev_domain(struct
   +device
   +*dev) {
   + return NULL;
   +}
   +
static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain
   *domain, dma_addr_t iova)  {
 return 0;
 
 
 
  --
  To unsubscribe from this list: send the line unsubscribe linux-pci
  in the body of a message to majord...@vger.kernel.org More majordomo
  info at http://vger.kernel.org/majordomo-info.html

___
Linuxppc-dev mailing list
Linuxppc-dev

  1   2   3   4   5   6   7   >