Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue

2016-06-28 Thread Jan Beulich
>>> On 28.06.16 at 09:06,  wrote:
> On June 27, 2016 11:21 PM, Jan Beulich  wrote:
>> >>> On 27.06.16 at 14:56,  wrote:
>> > On June 27, 2016 4:24 PM, Jan Beulich  wrote:
>> >> >>> On 24.06.16 at 07:51,  wrote:
>> >> > @@ -199,24 +199,73 @@ static int __must_check
>> >> queue_invalidate_wait(struct iommu *iommu,
>> >> >  return -EOPNOTSUPP;
>> >> >  }
>> >> >
>> >> > -static int __must_check invalidate_sync(struct iommu *iommu,
>> >> > -bool_t flush_dev_iotlb)
>> >> > +static int __must_check invalidate_sync(struct iommu *iommu)
>> >> >  {
>> >> >  struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
>> >> >
>> >> >  ASSERT(qi_ctrl->qinval_maddr);
>> >> >
>> >> > -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb);
>> >> > +return queue_invalidate_wait(iommu, 0, 1, 1, 0); }
>> >> > +
>> >> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did,
>> >> > + struct pci_dev *pdev) {
>> >> > +struct domain *d = NULL;
>> >> > +
>> >> > +if ( test_bit(did, iommu->domid_bitmap) )
>> >> > +d = rcu_lock_domain_by_id(iommu->domid_map[did]);
>> >> > +
>> >> > +/*
>> >> > + * In case the domain has been freed or the IOMMU domid bitmap is
>> >> > + * not valid, the device no longer belongs to this domain.
>> >> > + */
>> >> > +if ( d == NULL )
>> >> > +return;
>> >> > +
>> >> > +pcidevs_lock();
>> >> > +ASSERT(pdev->domain);
>> >> > +list_del(&pdev->domain_list);
>> >> > +pdev->domain = NULL;
>> >> > +pci_hide_existing_device(pdev);
>> >> > +pcidevs_unlock();
>> >> > +
>> >> > +if ( !d->is_shutting_down && printk_ratelimit() )
>> >> > +printk(XENLOG_WARNING VTDPREFIX
>> >> > +   " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
>> >> > +   d->domain_id, pdev->seg, pdev->bus,
>> >> > +   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
>> >> > +
>> >> > +if ( !is_hardware_domain(d) )
>> >> > +domain_crash(d);
>> >> > +
>> >> > +rcu_unlock_domain(d);
>> >> > +}
>> >>
>> >> So in an earlier patch in this series you (supposedly) moved similar
>> >> logic up to the vendor independent layer. I think this then would
>> >> better get moved up too, if at all possible.
>> >>
>> >
>> > To be honest, I have not much reason for leaving domain crash here and
>> > I was aware of this problem, but crash_domain() here is not harmful
>> > (as the 'd->is_shutting_down' is Set when to crash, and once the 'd-
>> >is_shutting_down'
>> > is Set then return  in domain_shutdown()  ).
>> > In case crash domain directly, it may help us narrow down the 'window'
>> > (the domain is still running)..
>> >
>> > To me, moving the logic up is acceptable.
>> >
>> > In next version, could I only drop:
>> >
>> > +if ( !is_hardware_domain(d) )
>> > +domain_crash(d);
>> >
>> > In this patch, and leave the rest as is ?
>> 
>> Not really - the entire function looks like it could move out of vtd/, as I 
>> can't
>> see anything VT-d specific in it.
>> 
> 
> Yes, it could be out of vtd, and then benefit arm/amd  IOMMU to hide ATS 
> device.
> 
> But 'did'  and 'iommu->domid_bitmap' are really vtd specific. Both of them 
> are 
> to get domain* structure, not a big deal, and then I can use domain_id 
> instead.
> 
> IMO, the domain* structure is a must here,

I agree, I did overlook that domain lookup. The common code
function should be passed a struct domain *, as you suggest.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue

2016-06-28 Thread Xu, Quan
On June 27, 2016 11:21 PM, Jan Beulich  wrote:
> >>> On 27.06.16 at 14:56,  wrote:
> > On June 27, 2016 4:24 PM, Jan Beulich  wrote:
> >> >>> On 24.06.16 at 07:51,  wrote:
> >> > @@ -199,24 +199,73 @@ static int __must_check
> >> queue_invalidate_wait(struct iommu *iommu,
> >> >  return -EOPNOTSUPP;
> >> >  }
> >> >
> >> > -static int __must_check invalidate_sync(struct iommu *iommu,
> >> > -bool_t flush_dev_iotlb)
> >> > +static int __must_check invalidate_sync(struct iommu *iommu)
> >> >  {
> >> >  struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
> >> >
> >> >  ASSERT(qi_ctrl->qinval_maddr);
> >> >
> >> > -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb);
> >> > +return queue_invalidate_wait(iommu, 0, 1, 1, 0); }
> >> > +
> >> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did,
> >> > + struct pci_dev *pdev) {
> >> > +struct domain *d = NULL;
> >> > +
> >> > +if ( test_bit(did, iommu->domid_bitmap) )
> >> > +d = rcu_lock_domain_by_id(iommu->domid_map[did]);
> >> > +
> >> > +/*
> >> > + * In case the domain has been freed or the IOMMU domid bitmap is
> >> > + * not valid, the device no longer belongs to this domain.
> >> > + */
> >> > +if ( d == NULL )
> >> > +return;
> >> > +
> >> > +pcidevs_lock();
> >> > +ASSERT(pdev->domain);
> >> > +list_del(&pdev->domain_list);
> >> > +pdev->domain = NULL;
> >> > +pci_hide_existing_device(pdev);
> >> > +pcidevs_unlock();
> >> > +
> >> > +if ( !d->is_shutting_down && printk_ratelimit() )
> >> > +printk(XENLOG_WARNING VTDPREFIX
> >> > +   " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
> >> > +   d->domain_id, pdev->seg, pdev->bus,
> >> > +   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
> >> > +
> >> > +if ( !is_hardware_domain(d) )
> >> > +domain_crash(d);
> >> > +
> >> > +rcu_unlock_domain(d);
> >> > +}
> >>
> >> So in an earlier patch in this series you (supposedly) moved similar
> >> logic up to the vendor independent layer. I think this then would
> >> better get moved up too, if at all possible.
> >>
> >
> > To be honest, I have not much reason for leaving domain crash here and
> > I was aware of this problem, but crash_domain() here is not harmful
> > (as the 'd->is_shutting_down' is Set when to crash, and once the 'd-
> >is_shutting_down'
> > is Set then return  in domain_shutdown()  ).
> > In case crash domain directly, it may help us narrow down the 'window'
> > (the domain is still running)..
> >
> > To me, moving the logic up is acceptable.
> >
> > In next version, could I only drop:
> >
> > +if ( !is_hardware_domain(d) )
> > +domain_crash(d);
> >
> > In this patch, and leave the rest as is ?
> 
> Not really - the entire function looks like it could move out of vtd/, as I 
> can't
> see anything VT-d specific in it.
> 

Yes, it could be out of vtd, and then benefit arm/amd  IOMMU to hide ATS device.

But 'did'  and 'iommu->domid_bitmap' are really vtd specific. Both of them are 
to get domain* structure, not a big deal, and then I can use domain_id instead.

IMO, the domain* structure is a must here,
As mentioned, not all of call trees of device iotlb flush are under 
pcidevs_lock, (.i.e  ...--iommu_iotlb_flush()-- xenmem_add_to_physmap()... )
In extreme cases, the domain may has been freed or the device may has been 
detached or even attached to another domain ( I also need to add 'if 
(pdev->domain == d )' before to hide device).
the domain* structure can help us check above cases.

Quan

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue

2016-06-27 Thread Jan Beulich
>>> On 27.06.16 at 14:56,  wrote:
> On June 27, 2016 4:24 PM, Jan Beulich  wrote:
>> >>> On 24.06.16 at 07:51,  wrote:
>> > @@ -199,24 +199,73 @@ static int __must_check
>> queue_invalidate_wait(struct iommu *iommu,
>> >  return -EOPNOTSUPP;
>> >  }
>> >
>> > -static int __must_check invalidate_sync(struct iommu *iommu,
>> > -bool_t flush_dev_iotlb)
>> > +static int __must_check invalidate_sync(struct iommu *iommu)
>> >  {
>> >  struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
>> >
>> >  ASSERT(qi_ctrl->qinval_maddr);
>> >
>> > -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb);
>> > +return queue_invalidate_wait(iommu, 0, 1, 1, 0); }
>> > +
>> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did,
>> > + struct pci_dev *pdev) {
>> > +struct domain *d = NULL;
>> > +
>> > +if ( test_bit(did, iommu->domid_bitmap) )
>> > +d = rcu_lock_domain_by_id(iommu->domid_map[did]);
>> > +
>> > +/*
>> > + * In case the domain has been freed or the IOMMU domid bitmap is
>> > + * not valid, the device no longer belongs to this domain.
>> > + */
>> > +if ( d == NULL )
>> > +return;
>> > +
>> > +pcidevs_lock();
>> > +ASSERT(pdev->domain);
>> > +list_del(&pdev->domain_list);
>> > +pdev->domain = NULL;
>> > +pci_hide_existing_device(pdev);
>> > +pcidevs_unlock();
>> > +
>> > +if ( !d->is_shutting_down && printk_ratelimit() )
>> > +printk(XENLOG_WARNING VTDPREFIX
>> > +   " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
>> > +   d->domain_id, pdev->seg, pdev->bus,
>> > +   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
>> > +
>> > +if ( !is_hardware_domain(d) )
>> > +domain_crash(d);
>> > +
>> > +rcu_unlock_domain(d);
>> > +}
>> 
>> So in an earlier patch in this series you (supposedly) moved similar logic 
>> up to
>> the vendor independent layer. I think this then would better get moved up
>> too, if at all possible.
>> 
> 
> To be honest, I have not much reason for leaving domain crash here and I was 
> aware of this problem, but crash_domain() here is not harmful (as the 
> 'd->is_shutting_down' is Set when to crash, and once the 
> 'd->is_shutting_down' 
> is Set then return  in domain_shutdown()  ).
> In case crash domain directly, it may help us narrow down the 'window' (the 
> domain is still running)..
> 
> To me, moving the logic up is acceptable.
> 
> In next version, could I only drop:
> 
> +if ( !is_hardware_domain(d) )
> +domain_crash(d);
> 
> In this patch, and leave the rest as is ?

Not really - the entire function looks like it could move out of vtd/,
as I can't see anything VT-d specific in it.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue

2016-06-27 Thread Xu, Quan
On June 27, 2016 4:24 PM, Jan Beulich  wrote:
> >>> On 24.06.16 at 07:51,  wrote:
> > @@ -199,24 +199,73 @@ static int __must_check
> queue_invalidate_wait(struct iommu *iommu,
> >  return -EOPNOTSUPP;
> >  }
> >
> > -static int __must_check invalidate_sync(struct iommu *iommu,
> > -bool_t flush_dev_iotlb)
> > +static int __must_check invalidate_sync(struct iommu *iommu)
> >  {
> >  struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
> >
> >  ASSERT(qi_ctrl->qinval_maddr);
> >
> > -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb);
> > +return queue_invalidate_wait(iommu, 0, 1, 1, 0); }
> > +
> > +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did,
> > + struct pci_dev *pdev) {
> > +struct domain *d = NULL;
> > +
> > +if ( test_bit(did, iommu->domid_bitmap) )
> > +d = rcu_lock_domain_by_id(iommu->domid_map[did]);
> > +
> > +/*
> > + * In case the domain has been freed or the IOMMU domid bitmap is
> > + * not valid, the device no longer belongs to this domain.
> > + */
> > +if ( d == NULL )
> > +return;
> > +
> > +pcidevs_lock();
> > +ASSERT(pdev->domain);
> > +list_del(&pdev->domain_list);
> > +pdev->domain = NULL;
> > +pci_hide_existing_device(pdev);
> > +pcidevs_unlock();
> > +
> > +if ( !d->is_shutting_down && printk_ratelimit() )
> > +printk(XENLOG_WARNING VTDPREFIX
> > +   " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
> > +   d->domain_id, pdev->seg, pdev->bus,
> > +   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
> > +
> > +if ( !is_hardware_domain(d) )
> > +domain_crash(d);
> > +
> > +rcu_unlock_domain(d);
> > +}
> 
> So in an earlier patch in this series you (supposedly) moved similar logic up 
> to
> the vendor independent layer. I think this then would better get moved up
> too, if at all possible.
> 

To be honest, I have not much reason for leaving domain crash here and I was 
aware of this problem, but crash_domain() here is not harmful (as the 
'd->is_shutting_down' is Set when to crash, and once the 'd->is_shutting_down' 
is Set then return  in domain_shutdown()  ).
In case crash domain directly, it may help us narrow down the 'window' (the 
domain is still running)..

To me, moving the logic up is acceptable.

In next version, could I only drop:

+if ( !is_hardware_domain(d) )
+domain_crash(d);

In this patch, and leave the rest as is ?

Quan



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue

2016-06-27 Thread Jan Beulich
>>> On 24.06.16 at 07:51,  wrote:
> @@ -199,24 +199,73 @@ static int __must_check queue_invalidate_wait(struct 
> iommu *iommu,
>  return -EOPNOTSUPP;
>  }
>  
> -static int __must_check invalidate_sync(struct iommu *iommu,
> -bool_t flush_dev_iotlb)
> +static int __must_check invalidate_sync(struct iommu *iommu)
>  {
>  struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
>  
>  ASSERT(qi_ctrl->qinval_maddr);
>  
> -return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb);
> +return queue_invalidate_wait(iommu, 0, 1, 1, 0);
> +}
> +
> +static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did,
> + struct pci_dev *pdev)
> +{
> +struct domain *d = NULL;
> +
> +if ( test_bit(did, iommu->domid_bitmap) )
> +d = rcu_lock_domain_by_id(iommu->domid_map[did]);
> +
> +/*
> + * In case the domain has been freed or the IOMMU domid bitmap is
> + * not valid, the device no longer belongs to this domain.
> + */
> +if ( d == NULL )
> +return;
> +
> +pcidevs_lock();
> +ASSERT(pdev->domain);
> +list_del(&pdev->domain_list);
> +pdev->domain = NULL;
> +pci_hide_existing_device(pdev);
> +pcidevs_unlock();
> +
> +if ( !d->is_shutting_down && printk_ratelimit() )
> +printk(XENLOG_WARNING VTDPREFIX
> +   " dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
> +   d->domain_id, pdev->seg, pdev->bus,
> +   PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
> +
> +if ( !is_hardware_domain(d) )
> +domain_crash(d);
> +
> +rcu_unlock_domain(d);
> +}

So in an earlier patch in this series you (supposedly) moved similar
logic up to the vendor independent layer. I think this then would
better get moved up too, if at all possible.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue

2016-06-27 Thread Jan Beulich
>>> On 26.06.16 at 11:18,  wrote:
> On June 24, 2016 7:55 PM, Tian, Kevin  wrote:
>> > From: Xu, Quan
>> > Sent: Friday, June 24, 2016 1:52 PM
>> > diff --git a/xen/drivers/passthrough/vtd/extern.h
>> > b/xen/drivers/passthrough/vtd/extern.h
>> > index 45357f2..efaff28 100644
>> > --- a/xen/drivers/passthrough/vtd/extern.h
>> > +++ b/xen/drivers/passthrough/vtd/extern.h
>> > @@ -25,6 +25,7 @@
>> >
>> >  #define VTDPREFIX "[VT-D]"
>> >
>> > +struct pci_ats_dev;
>> >  extern bool_t rwbf_quirk;
>> >
>> >  void print_iommu_regs(struct acpi_drhd_unit *drhd); @@ -60,8 +61,8 @@
>> > int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
>> >   u64 addr, unsigned int size_order, u64
>> > type);
>> >
>> >  int __must_check qinval_device_iotlb_sync(struct iommu *iommu,
>> > -  u32 max_invs_pend,
>> > -  u16 sid, u16 size, u64 addr);
>> > +  struct pci_ats_dev *ats_dev,
>> > +  u16 did, u16 size, u64
>> > + addr);
>> >
>> >  unsigned int get_cache_line_size(void);  void cacheline_flush(char
>> > *); diff --git a/xen/drivers/passthrough/vtd/qinval.c
>> > b/xen/drivers/passthrough/vtd/qinval.c
>> > index 4492b29..e4e2771 100644
>> > --- a/xen/drivers/passthrough/vtd/qinval.c
>> > +++ b/xen/drivers/passthrough/vtd/qinval.c
>> > @@ -27,11 +27,11 @@
>> >  #include "dmar.h"
>> >  #include "vtd.h"
>> >  #include "extern.h"
>> > +#include "../ats.h"
>> 
>> Earlier you said:
>> >1. a forward declaration struct pci_ats_dev*, instead of
>> >   including ats.h.
>>
> 
> This context is 'in extern.h', but..
> 
>> But above you still have ats.h included.
>> 
> 
> .. I really need to include 'ats.h' here, as the 'struct pci_ats_dev*' is 
> used in this file.

Used as in "a variable of this type, or a pointer to this type
de-referenced", or only as in "a variable/parameter of pointer
to this type declared"? In the latter case you don't need the
include.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue

2016-06-26 Thread Xu, Quan
On June 24, 2016 7:55 PM, Tian, Kevin  wrote:
> > From: Xu, Quan
> > Sent: Friday, June 24, 2016 1:52 PM
> > diff --git a/xen/drivers/passthrough/vtd/extern.h
> > b/xen/drivers/passthrough/vtd/extern.h
> > index 45357f2..efaff28 100644
> > --- a/xen/drivers/passthrough/vtd/extern.h
> > +++ b/xen/drivers/passthrough/vtd/extern.h
> > @@ -25,6 +25,7 @@
> >
> >  #define VTDPREFIX "[VT-D]"
> >
> > +struct pci_ats_dev;
> >  extern bool_t rwbf_quirk;
> >
> >  void print_iommu_regs(struct acpi_drhd_unit *drhd); @@ -60,8 +61,8 @@
> > int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
> >   u64 addr, unsigned int size_order, u64
> > type);
> >
> >  int __must_check qinval_device_iotlb_sync(struct iommu *iommu,
> > -  u32 max_invs_pend,
> > -  u16 sid, u16 size, u64 addr);
> > +  struct pci_ats_dev *ats_dev,
> > +  u16 did, u16 size, u64
> > + addr);
> >
> >  unsigned int get_cache_line_size(void);  void cacheline_flush(char
> > *); diff --git a/xen/drivers/passthrough/vtd/qinval.c
> > b/xen/drivers/passthrough/vtd/qinval.c
> > index 4492b29..e4e2771 100644
> > --- a/xen/drivers/passthrough/vtd/qinval.c
> > +++ b/xen/drivers/passthrough/vtd/qinval.c
> > @@ -27,11 +27,11 @@
> >  #include "dmar.h"
> >  #include "vtd.h"
> >  #include "extern.h"
> > +#include "../ats.h"
> 
> Earlier you said:
> >1. a forward declaration struct pci_ats_dev*, instead of
> >   including ats.h.
>

This context is 'in extern.h', but..

> But above you still have ats.h included.
> 

.. I really need to include 'ats.h' here, as the 'struct pci_ats_dev*' is used 
in this file.

> >
> >  #define VTD_QI_TIMEOUT 1
> >
> > -static int __must_check invalidate_sync(struct iommu *iommu,
> > -bool_t flush_dev_iotlb);
> > +static int __must_check invalidate_sync(struct iommu *iommu);
> 
> I don't understand the rationale behind. In earlier patch you introduce a new
> parameter which is however just removed later here
> 
In earlier patch, refactor invalidate_sync() to indicate whether we need to 
flush device IOTLB or not.
change it back here, as I add a specific function - dev_invalidate_sync() for 
device IOTLB invalidation..

Quan





___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue

2016-06-24 Thread Jan Beulich
>>> On 24.06.16 at 13:55,  wrote:
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -419,7 +419,7 @@ static void free_pdev(struct pci_seg *pseg, struct 
>> pci_dev *pdev)
>>  xfree(pdev);
>>  }
>> 
>> -static void _pci_hide_device(struct pci_dev *pdev)
>> +void pci_hide_existing_device(struct pci_dev *pdev)
> 
> Don't understand what is the meaning of "hiding existing device".
> Is there case where you may want to hide a non-existing device?

The distinction is whether alloc_pdev() needs calling, as can be seen ...

>> @@ -436,7 +436,7 @@ int __init pci_hide_device(int bus, int devfn)
>>  pdev = alloc_pdev(get_pseg(0), bus, devfn);
>>  if ( pdev )
>>  {
>> -_pci_hide_device(pdev);
>> +pci_hide_existing_device(pdev);
>>  rc = 0;
>>  }

... as the beginning context of this hunk.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue

2016-06-24 Thread Tian, Kevin
> From: Xu, Quan
> Sent: Friday, June 24, 2016 1:52 PM
> 
> From: Quan Xu 
> 
> If Device-TLB flush timed out, we hide the target ATS device
> immediately and crash the domain owning this ATS device. If
> impacted domain is hardware domain, just throw out a warning.
> 
> By hiding the device, we make sure it can't be assigned to any
> domain any longer (see device_assigned).
> 
> Signed-off-by: Quan Xu 
> 
> CC: Jan Beulich 
> CC: Kevin Tian 
> CC: Feng Wu 
> 
> ---
> v12:
>1. a forward declaration struct pci_ats_dev*, instead of
>   including ats.h.
>2. eliminate the loop.
>3. use the same logic for logging and crashing as I did in
>   other series (despite I have moved the domain crash logic
>   up to the generic IOMMU layer, I think I am better still
>   leave it as is).
>4. enhance dev_invalidate_sync() with ASSERT().
> ---
>  xen/drivers/passthrough/pci.c |  6 +--
>  xen/drivers/passthrough/vtd/extern.h  |  5 ++-
>  xen/drivers/passthrough/vtd/qinval.c  | 75
> +--
>  xen/drivers/passthrough/vtd/x86/ats.c |  9 +
>  xen/include/xen/pci.h |  1 +
>  5 files changed, 71 insertions(+), 25 deletions(-)
> 
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index 98936f55c..843dc88 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -419,7 +419,7 @@ static void free_pdev(struct pci_seg *pseg, struct 
> pci_dev *pdev)
>  xfree(pdev);
>  }
> 
> -static void _pci_hide_device(struct pci_dev *pdev)
> +void pci_hide_existing_device(struct pci_dev *pdev)

Don't understand what is the meaning of "hiding existing device".
Is there case where you may want to hide a non-existing device?

>  {
>  if ( pdev->domain )
>  return;
> @@ -436,7 +436,7 @@ int __init pci_hide_device(int bus, int devfn)
>  pdev = alloc_pdev(get_pseg(0), bus, devfn);
>  if ( pdev )
>  {
> -_pci_hide_device(pdev);
> +pci_hide_existing_device(pdev);
>  rc = 0;
>  }
>  pcidevs_unlock();
> @@ -466,7 +466,7 @@ int __init pci_ro_device(int seg, int bus, int devfn)
>  }
> 
>  __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
> -_pci_hide_device(pdev);
> +pci_hide_existing_device(pdev);
> 
>  return 0;
>  }
> diff --git a/xen/drivers/passthrough/vtd/extern.h
> b/xen/drivers/passthrough/vtd/extern.h
> index 45357f2..efaff28 100644
> --- a/xen/drivers/passthrough/vtd/extern.h
> +++ b/xen/drivers/passthrough/vtd/extern.h
> @@ -25,6 +25,7 @@
> 
>  #define VTDPREFIX "[VT-D]"
> 
> +struct pci_ats_dev;
>  extern bool_t rwbf_quirk;
> 
>  void print_iommu_regs(struct acpi_drhd_unit *drhd);
> @@ -60,8 +61,8 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
>   u64 addr, unsigned int size_order, u64 type);
> 
>  int __must_check qinval_device_iotlb_sync(struct iommu *iommu,
> -  u32 max_invs_pend,
> -  u16 sid, u16 size, u64 addr);
> +  struct pci_ats_dev *ats_dev,
> +  u16 did, u16 size, u64 addr);
> 
>  unsigned int get_cache_line_size(void);
>  void cacheline_flush(char *);
> diff --git a/xen/drivers/passthrough/vtd/qinval.c 
> b/xen/drivers/passthrough/vtd/qinval.c
> index 4492b29..e4e2771 100644
> --- a/xen/drivers/passthrough/vtd/qinval.c
> +++ b/xen/drivers/passthrough/vtd/qinval.c
> @@ -27,11 +27,11 @@
>  #include "dmar.h"
>  #include "vtd.h"
>  #include "extern.h"
> +#include "../ats.h"

Earlier you said:
>1. a forward declaration struct pci_ats_dev*, instead of
>   including ats.h.

But above you still have ats.h included.

> 
>  #define VTD_QI_TIMEOUT   1
> 
> -static int __must_check invalidate_sync(struct iommu *iommu,
> -bool_t flush_dev_iotlb);
> +static int __must_check invalidate_sync(struct iommu *iommu);

I don't understand the rationale behind. In earlier patch you introduce
a new parameter which is however just removed later here

Thanks
Kevin


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH v12 6/6] vt-d: fix vt-d Device-TLB flush timeout issue

2016-06-23 Thread Xu, Quan
From: Quan Xu 

If Device-TLB flush timed out, we hide the target ATS device
immediately and crash the domain owning this ATS device. If
impacted domain is hardware domain, just throw out a warning.

By hiding the device, we make sure it can't be assigned to any
domain any longer (see device_assigned).

Signed-off-by: Quan Xu 

CC: Jan Beulich 
CC: Kevin Tian 
CC: Feng Wu 

---
v12:
   1. a forward declaration struct pci_ats_dev*, instead of
  including ats.h.
   2. eliminate the loop.
   3. use the same logic for logging and crashing as I did in
  other series (despite I have moved the domain crash logic
  up to the generic IOMMU layer, I think I am better still
  leave it as is).
   4. enhance dev_invalidate_sync() with ASSERT().
---
 xen/drivers/passthrough/pci.c |  6 +--
 xen/drivers/passthrough/vtd/extern.h  |  5 ++-
 xen/drivers/passthrough/vtd/qinval.c  | 75 +--
 xen/drivers/passthrough/vtd/x86/ats.c |  9 +
 xen/include/xen/pci.h |  1 +
 5 files changed, 71 insertions(+), 25 deletions(-)

diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 98936f55c..843dc88 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -419,7 +419,7 @@ static void free_pdev(struct pci_seg *pseg, struct pci_dev 
*pdev)
 xfree(pdev);
 }
 
-static void _pci_hide_device(struct pci_dev *pdev)
+void pci_hide_existing_device(struct pci_dev *pdev)
 {
 if ( pdev->domain )
 return;
@@ -436,7 +436,7 @@ int __init pci_hide_device(int bus, int devfn)
 pdev = alloc_pdev(get_pseg(0), bus, devfn);
 if ( pdev )
 {
-_pci_hide_device(pdev);
+pci_hide_existing_device(pdev);
 rc = 0;
 }
 pcidevs_unlock();
@@ -466,7 +466,7 @@ int __init pci_ro_device(int seg, int bus, int devfn)
 }
 
 __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
-_pci_hide_device(pdev);
+pci_hide_existing_device(pdev);
 
 return 0;
 }
diff --git a/xen/drivers/passthrough/vtd/extern.h 
b/xen/drivers/passthrough/vtd/extern.h
index 45357f2..efaff28 100644
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -25,6 +25,7 @@
 
 #define VTDPREFIX "[VT-D]"
 
+struct pci_ats_dev;
 extern bool_t rwbf_quirk;
 
 void print_iommu_regs(struct acpi_drhd_unit *drhd);
@@ -60,8 +61,8 @@ int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
  u64 addr, unsigned int size_order, u64 type);
 
 int __must_check qinval_device_iotlb_sync(struct iommu *iommu,
-  u32 max_invs_pend,
-  u16 sid, u16 size, u64 addr);
+  struct pci_ats_dev *ats_dev,
+  u16 did, u16 size, u64 addr);
 
 unsigned int get_cache_line_size(void);
 void cacheline_flush(char *);
diff --git a/xen/drivers/passthrough/vtd/qinval.c 
b/xen/drivers/passthrough/vtd/qinval.c
index 4492b29..e4e2771 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -27,11 +27,11 @@
 #include "dmar.h"
 #include "vtd.h"
 #include "extern.h"
+#include "../ats.h"
 
 #define VTD_QI_TIMEOUT 1
 
-static int __must_check invalidate_sync(struct iommu *iommu,
-bool_t flush_dev_iotlb);
+static int __must_check invalidate_sync(struct iommu *iommu);
 
 static void print_qi_regs(struct iommu *iommu)
 {
@@ -103,7 +103,7 @@ static int __must_check 
queue_invalidate_context_sync(struct iommu *iommu,
 
 unmap_vtd_domain_page(qinval_entries);
 
-return invalidate_sync(iommu, 0);
+return invalidate_sync(iommu);
 }
 
 static int __must_check queue_invalidate_iotlb_sync(struct iommu *iommu,
@@ -140,7 +140,7 @@ static int __must_check queue_invalidate_iotlb_sync(struct 
iommu *iommu,
 qinval_update_qtail(iommu, index);
 spin_unlock_irqrestore(&iommu->register_lock, flags);
 
-return invalidate_sync(iommu, 0);
+return invalidate_sync(iommu);
 }
 
 static int __must_check queue_invalidate_wait(struct iommu *iommu,
@@ -199,24 +199,73 @@ static int __must_check queue_invalidate_wait(struct 
iommu *iommu,
 return -EOPNOTSUPP;
 }
 
-static int __must_check invalidate_sync(struct iommu *iommu,
-bool_t flush_dev_iotlb)
+static int __must_check invalidate_sync(struct iommu *iommu)
 {
 struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
 
 ASSERT(qi_ctrl->qinval_maddr);
 
-return queue_invalidate_wait(iommu, 0, 1, 1, flush_dev_iotlb);
+return queue_invalidate_wait(iommu, 0, 1, 1, 0);
+}
+
+static void dev_invalidate_iotlb_timeout(struct iommu *iommu, u16 did,
+ struct pci_dev *pdev)
+{
+struct domain *d = NULL;
+
+if ( test_bit(did, iommu->domid_bitmap) )
+d = rcu_lock_domain_by_id(iommu->domid_map[did]);
+
+/*
+ * In case the domain h