Re: [RFC v1 6/8] mshv: command line option to skip devices in PV-IOMMU

2021-08-10 Thread Wei Liu
On Wed, Aug 04, 2021 at 12:33:54PM +0530, Praveen Kumar wrote:
> On 04-08-2021 03:26, Wei Liu wrote:
> >>>   struct iommu_domain domain;
> >>> @@ -774,6 +784,41 @@ static struct iommu_device 
> >>> *hv_iommu_probe_device(struct device *dev)
> >>>   if (!dev_is_pci(dev))
> >>>   return ERR_PTR(-ENODEV);
> >>>  
> >>> + /*
> >>> +  * Skip the PCI device specified in `pci_devs_to_skip`. This is a
> >>> +  * temporary solution until we figure out a way to extract information
> >>> +  * from the hypervisor what devices it is already using.
> >>> +  */
> >>> + if (pci_devs_to_skip && *pci_devs_to_skip) {
> >>> + int pos = 0;
> >>> + int parsed;
> >>> + int segment, bus, slot, func;
> >>> + struct pci_dev *pdev = to_pci_dev(dev);
> >>> +
> >>> + do {
> >>> + parsed = 0;
> >>> +
> >>> + sscanf(pci_devs_to_skip + pos,
> >>> + " (%x:%x:%x.%x) %n",
> >>> + &segment, &bus, &slot, &func, &parsed);
> >>> +
> >>> + if (parsed <= 0)
> >>> + break;
> >>> +
> >>> + if (pci_domain_nr(pdev->bus) == segment &&
> >>> + pdev->bus->number == bus &&
> >>> + PCI_SLOT(pdev->devfn) == slot &&
> >>> + PCI_FUNC(pdev->devfn) == func)
> >>> + {
> >>> + dev_info(dev, "skipped by MSHV IOMMU\n");
> >>> + return ERR_PTR(-ENODEV);
> >>> + }
> >>> +
> >>> + pos += parsed;
> >>> +
> >>> + } while (pci_devs_to_skip[pos]);
> >>
> >> Is there a possibility of pci_devs_to_skip + pos > sizeof(pci_devs_to_skip)
> >> and also a valid memory ?
> > 
> > pos should point to the last parsed position. If parsing fails pos does
> > not get updated and the code breaks out of the loop. If parsing is
> > success pos should point to either the start of next element of '\0'
> > (end of string). To me this is good enough.
> 
> The point is, hypothetically the address to pci_devs_to_skip + pos can
> be valid address (later to '\0'), and thus there is a possibility,
> that parsing may not fail.

Have you found an example how at any given point in time
pci_devs_to_skip + pos can point outside of user provided string?

> Another, there is also a possibility of sscanf faulting accessing the
> illegal address, if pci_devs_to_skip[pos] turns out to be not NULL or
> valid address.

That depends on pci_devs_to_skip + pos can point to an invalid address
in the first place, so that goes back to the question above.

> 
> > 
> >> I would recommend to have a check of size as well before accessing the
> >> array content, just to be safer accessing any memory.
> >>
> > 
> > What check do you have in mind?
> 
> Something like,
> size_t len = strlen(pci_devs_to_skip);
> do {
> 
>   len -= parsed;
> } while (len);
> 
> OR
> 
> do {
> ...
>   pos += parsed;
> } while (pos < len);
> 
> Further, I'm also fine with the existing code, if you think this won't
> break and already been taken care. Thanks.

But in the loop somewhere you will still need to parse pci_devs_to_skip
+ some_offset. The new code structure does not remove that, right?

Given this is for debugging and is supposed to be temporary, I think the
code is good enough. But I want to make sure if there is anything I
missed.

Wei.

> 
> Regards,
> 
> ~Praveen.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v1 6/8] mshv: command line option to skip devices in PV-IOMMU

2021-08-04 Thread Praveen Kumar
On 04-08-2021 03:26, Wei Liu wrote:
>>> struct iommu_domain domain;
>>> @@ -774,6 +784,41 @@ static struct iommu_device 
>>> *hv_iommu_probe_device(struct device *dev)
>>> if (!dev_is_pci(dev))
>>> return ERR_PTR(-ENODEV);
>>>  
>>> +   /*
>>> +* Skip the PCI device specified in `pci_devs_to_skip`. This is a
>>> +* temporary solution until we figure out a way to extract information
>>> +* from the hypervisor what devices it is already using.
>>> +*/
>>> +   if (pci_devs_to_skip && *pci_devs_to_skip) {
>>> +   int pos = 0;
>>> +   int parsed;
>>> +   int segment, bus, slot, func;
>>> +   struct pci_dev *pdev = to_pci_dev(dev);
>>> +
>>> +   do {
>>> +   parsed = 0;
>>> +
>>> +   sscanf(pci_devs_to_skip + pos,
>>> +   " (%x:%x:%x.%x) %n",
>>> +   &segment, &bus, &slot, &func, &parsed);
>>> +
>>> +   if (parsed <= 0)
>>> +   break;
>>> +
>>> +   if (pci_domain_nr(pdev->bus) == segment &&
>>> +   pdev->bus->number == bus &&
>>> +   PCI_SLOT(pdev->devfn) == slot &&
>>> +   PCI_FUNC(pdev->devfn) == func)
>>> +   {
>>> +   dev_info(dev, "skipped by MSHV IOMMU\n");
>>> +   return ERR_PTR(-ENODEV);
>>> +   }
>>> +
>>> +   pos += parsed;
>>> +
>>> +   } while (pci_devs_to_skip[pos]);
>>
>> Is there a possibility of pci_devs_to_skip + pos > sizeof(pci_devs_to_skip)
>> and also a valid memory ?
> 
> pos should point to the last parsed position. If parsing fails pos does
> not get updated and the code breaks out of the loop. If parsing is
> success pos should point to either the start of next element of '\0'
> (end of string). To me this is good enough.

The point is, hypothetically the address to pci_devs_to_skip + pos can be valid 
address (later to '\0'), and thus there is a possibility, that parsing may not 
fail.
Another, there is also a possibility of sscanf faulting accessing the illegal 
address, if pci_devs_to_skip[pos] turns out to be not NULL or valid address.

> 
>> I would recommend to have a check of size as well before accessing the
>> array content, just to be safer accessing any memory.
>>
> 
> What check do you have in mind?

Something like,
size_t len = strlen(pci_devs_to_skip);
do {

len -= parsed;
} while (len);

OR

do {
...
pos += parsed;
} while (pos < len);

Further, I'm also fine with the existing code, if you think this won't break 
and already been taken care. Thanks.

Regards,

~Praveen.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v1 6/8] mshv: command line option to skip devices in PV-IOMMU

2021-08-03 Thread Wei Liu
On Wed, Aug 04, 2021 at 12:20:42AM +0530, Praveen Kumar wrote:
> On 09-07-2021 17:13, Wei Liu wrote:
> > Some devices may have been claimed by the hypervisor already. One such
> > example is a user can assign a NIC for debugging purpose.
> > 
> > Ideally Linux should be able to tell retrieve that information, but
> > there is no way to do that yet. And designing that new mechanism is
> > going to take time.
> > 
> > Provide a command line option for skipping devices. This is a stopgap
> > solution, so it is intentionally undocumented. Hopefully we can retire
> > it in the future.
> > 
> > Signed-off-by: Wei Liu 
> > ---
> >  drivers/iommu/hyperv-iommu.c | 45 
> >  1 file changed, 45 insertions(+)
> > 
> > diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
> > index 043dcff06511..353da5036387 100644
> > --- a/drivers/iommu/hyperv-iommu.c
> > +++ b/drivers/iommu/hyperv-iommu.c
> > @@ -349,6 +349,16 @@ static const struct irq_domain_ops 
> > hyperv_root_ir_domain_ops = {
> >  
> >  #ifdef CONFIG_HYPERV_ROOT_PVIOMMU
> >  
> > +/* The IOMMU will not claim these PCI devices. */
> > +static char *pci_devs_to_skip;
> > +static int __init mshv_iommu_setup_skip(char *str) {
> > +   pci_devs_to_skip = str;
> > +
> > +   return 0;
> > +}
> > +/* mshv_iommu_skip=(:BB:DD.F)(:BB:DD.F) */
> > +__setup("mshv_iommu_skip=", mshv_iommu_setup_skip);
> > +
> >  /* DMA remapping support */
> >  struct hv_iommu_domain {
> > struct iommu_domain domain;
> > @@ -774,6 +784,41 @@ static struct iommu_device 
> > *hv_iommu_probe_device(struct device *dev)
> > if (!dev_is_pci(dev))
> > return ERR_PTR(-ENODEV);
> >  
> > +   /*
> > +* Skip the PCI device specified in `pci_devs_to_skip`. This is a
> > +* temporary solution until we figure out a way to extract information
> > +* from the hypervisor what devices it is already using.
> > +*/
> > +   if (pci_devs_to_skip && *pci_devs_to_skip) {
> > +   int pos = 0;
> > +   int parsed;
> > +   int segment, bus, slot, func;
> > +   struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +   do {
> > +   parsed = 0;
> > +
> > +   sscanf(pci_devs_to_skip + pos,
> > +   " (%x:%x:%x.%x) %n",
> > +   &segment, &bus, &slot, &func, &parsed);
> > +
> > +   if (parsed <= 0)
> > +   break;
> > +
> > +   if (pci_domain_nr(pdev->bus) == segment &&
> > +   pdev->bus->number == bus &&
> > +   PCI_SLOT(pdev->devfn) == slot &&
> > +   PCI_FUNC(pdev->devfn) == func)
> > +   {
> > +   dev_info(dev, "skipped by MSHV IOMMU\n");
> > +   return ERR_PTR(-ENODEV);
> > +   }
> > +
> > +   pos += parsed;
> > +
> > +   } while (pci_devs_to_skip[pos]);
> 
> Is there a possibility of pci_devs_to_skip + pos > sizeof(pci_devs_to_skip)
> and also a valid memory ?

pos should point to the last parsed position. If parsing fails pos does
not get updated and the code breaks out of the loop. If parsing is
success pos should point to either the start of next element of '\0'
(end of string). To me this is good enough.

> I would recommend to have a check of size as well before accessing the
> array content, just to be safer accessing any memory.
> 

What check do you have in mind?

Wei.

> > +   }
> > +
> > vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
> > if (!vdev)
> > return ERR_PTR(-ENOMEM);
> > 
> 
> Regards,
> 
> ~Praveen.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v1 6/8] mshv: command line option to skip devices in PV-IOMMU

2021-08-03 Thread Praveen Kumar
On 09-07-2021 17:13, Wei Liu wrote:
> Some devices may have been claimed by the hypervisor already. One such
> example is a user can assign a NIC for debugging purpose.
> 
> Ideally Linux should be able to tell retrieve that information, but
> there is no way to do that yet. And designing that new mechanism is
> going to take time.
> 
> Provide a command line option for skipping devices. This is a stopgap
> solution, so it is intentionally undocumented. Hopefully we can retire
> it in the future.
> 
> Signed-off-by: Wei Liu 
> ---
>  drivers/iommu/hyperv-iommu.c | 45 
>  1 file changed, 45 insertions(+)
> 
> diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
> index 043dcff06511..353da5036387 100644
> --- a/drivers/iommu/hyperv-iommu.c
> +++ b/drivers/iommu/hyperv-iommu.c
> @@ -349,6 +349,16 @@ static const struct irq_domain_ops 
> hyperv_root_ir_domain_ops = {
>  
>  #ifdef CONFIG_HYPERV_ROOT_PVIOMMU
>  
> +/* The IOMMU will not claim these PCI devices. */
> +static char *pci_devs_to_skip;
> +static int __init mshv_iommu_setup_skip(char *str) {
> + pci_devs_to_skip = str;
> +
> + return 0;
> +}
> +/* mshv_iommu_skip=(:BB:DD.F)(:BB:DD.F) */
> +__setup("mshv_iommu_skip=", mshv_iommu_setup_skip);
> +
>  /* DMA remapping support */
>  struct hv_iommu_domain {
>   struct iommu_domain domain;
> @@ -774,6 +784,41 @@ static struct iommu_device *hv_iommu_probe_device(struct 
> device *dev)
>   if (!dev_is_pci(dev))
>   return ERR_PTR(-ENODEV);
>  
> + /*
> +  * Skip the PCI device specified in `pci_devs_to_skip`. This is a
> +  * temporary solution until we figure out a way to extract information
> +  * from the hypervisor what devices it is already using.
> +  */
> + if (pci_devs_to_skip && *pci_devs_to_skip) {
> + int pos = 0;
> + int parsed;
> + int segment, bus, slot, func;
> + struct pci_dev *pdev = to_pci_dev(dev);
> +
> + do {
> + parsed = 0;
> +
> + sscanf(pci_devs_to_skip + pos,
> + " (%x:%x:%x.%x) %n",
> + &segment, &bus, &slot, &func, &parsed);
> +
> + if (parsed <= 0)
> + break;
> +
> + if (pci_domain_nr(pdev->bus) == segment &&
> + pdev->bus->number == bus &&
> + PCI_SLOT(pdev->devfn) == slot &&
> + PCI_FUNC(pdev->devfn) == func)
> + {
> + dev_info(dev, "skipped by MSHV IOMMU\n");
> + return ERR_PTR(-ENODEV);
> + }
> +
> + pos += parsed;
> +
> + } while (pci_devs_to_skip[pos]);

Is there a possibility of pci_devs_to_skip + pos > sizeof(pci_devs_to_skip) and 
also a valid memory ?
I would recommend to have a check of size as well before accessing the array 
content, just to be safer accessing any memory.

> + }
> +
>   vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
>   if (!vdev)
>   return ERR_PTR(-ENOMEM);
> 

Regards,

~Praveen.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v1 6/8] mshv: command line option to skip devices in PV-IOMMU

2021-07-09 Thread Wei Liu
On Fri, Jul 09, 2021 at 01:46:19PM +0100, Robin Murphy wrote:
> On 2021-07-09 12:43, Wei Liu wrote:
> > Some devices may have been claimed by the hypervisor already. One such
> > example is a user can assign a NIC for debugging purpose.
> > 
> > Ideally Linux should be able to tell retrieve that information, but
> > there is no way to do that yet. And designing that new mechanism is
> > going to take time.
> > 
> > Provide a command line option for skipping devices. This is a stopgap
> > solution, so it is intentionally undocumented. Hopefully we can retire
> > it in the future.
> 
> Huh? If the host is using a device, why the heck is it exposing any
> knowledge of that device to the guest at all, let alone allowing the guest
> to do anything that could affect its operation!?

The host in this setup consists of the hypervisor, the root kernel and a
bunch of user space programs.

Root is not an ordinary guest. It does need to know all the hardware to
manage the platform. Hypervisor does not claim more devices than it
needs to, nor does it try to hide hardware details from the root.

The hypervisor can protect itself just fine. Any attempt to use the
already claimed devices will be blocked or rejected, so are the attempts
to attach them to device domains.

That, however, leads to some interesting interactions between the
hypervisor and Linux kernel.  When kernel initializes IOMMU during boot,
it will try to attach all devices in one go. Any failure there will
cause kernel to detach the already attached devices. That's not fatal to
kernel, and is only a minor annoyance to our current use case, because
the default domain is a passthrough domain anyway. It will become
problematic once we switch the default domain to a DMA domain to further
tighten security during Linux boot.

Wei.

> 
> Robin.
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v1 6/8] mshv: command line option to skip devices in PV-IOMMU

2021-07-09 Thread Robin Murphy

On 2021-07-09 12:43, Wei Liu wrote:

Some devices may have been claimed by the hypervisor already. One such
example is a user can assign a NIC for debugging purpose.

Ideally Linux should be able to tell retrieve that information, but
there is no way to do that yet. And designing that new mechanism is
going to take time.

Provide a command line option for skipping devices. This is a stopgap
solution, so it is intentionally undocumented. Hopefully we can retire
it in the future.


Huh? If the host is using a device, why the heck is it exposing any 
knowledge of that device to the guest at all, let alone allowing the 
guest to do anything that could affect its operation!?


Robin.


Signed-off-by: Wei Liu 
---
  drivers/iommu/hyperv-iommu.c | 45 
  1 file changed, 45 insertions(+)

diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index 043dcff06511..353da5036387 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -349,6 +349,16 @@ static const struct irq_domain_ops 
hyperv_root_ir_domain_ops = {
  
  #ifdef CONFIG_HYPERV_ROOT_PVIOMMU
  
+/* The IOMMU will not claim these PCI devices. */

+static char *pci_devs_to_skip;
+static int __init mshv_iommu_setup_skip(char *str) {
+   pci_devs_to_skip = str;
+
+   return 0;
+}
+/* mshv_iommu_skip=(:BB:DD.F)(:BB:DD.F) */
+__setup("mshv_iommu_skip=", mshv_iommu_setup_skip);
+
  /* DMA remapping support */
  struct hv_iommu_domain {
struct iommu_domain domain;
@@ -774,6 +784,41 @@ static struct iommu_device *hv_iommu_probe_device(struct 
device *dev)
if (!dev_is_pci(dev))
return ERR_PTR(-ENODEV);
  
+	/*

+* Skip the PCI device specified in `pci_devs_to_skip`. This is a
+* temporary solution until we figure out a way to extract information
+* from the hypervisor what devices it is already using.
+*/
+   if (pci_devs_to_skip && *pci_devs_to_skip) {
+   int pos = 0;
+   int parsed;
+   int segment, bus, slot, func;
+   struct pci_dev *pdev = to_pci_dev(dev);
+
+   do {
+   parsed = 0;
+
+   sscanf(pci_devs_to_skip + pos,
+   " (%x:%x:%x.%x) %n",
+   &segment, &bus, &slot, &func, &parsed);
+
+   if (parsed <= 0)
+   break;
+
+   if (pci_domain_nr(pdev->bus) == segment &&
+   pdev->bus->number == bus &&
+   PCI_SLOT(pdev->devfn) == slot &&
+   PCI_FUNC(pdev->devfn) == func)
+   {
+   dev_info(dev, "skipped by MSHV IOMMU\n");
+   return ERR_PTR(-ENODEV);
+   }
+
+   pos += parsed;
+
+   } while (pci_devs_to_skip[pos]);
+   }
+
vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev)
return ERR_PTR(-ENOMEM);


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC v1 6/8] mshv: command line option to skip devices in PV-IOMMU

2021-07-09 Thread Wei Liu
Some devices may have been claimed by the hypervisor already. One such
example is a user can assign a NIC for debugging purpose.

Ideally Linux should be able to tell retrieve that information, but
there is no way to do that yet. And designing that new mechanism is
going to take time.

Provide a command line option for skipping devices. This is a stopgap
solution, so it is intentionally undocumented. Hopefully we can retire
it in the future.

Signed-off-by: Wei Liu 
---
 drivers/iommu/hyperv-iommu.c | 45 
 1 file changed, 45 insertions(+)

diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index 043dcff06511..353da5036387 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -349,6 +349,16 @@ static const struct irq_domain_ops 
hyperv_root_ir_domain_ops = {
 
 #ifdef CONFIG_HYPERV_ROOT_PVIOMMU
 
+/* The IOMMU will not claim these PCI devices. */
+static char *pci_devs_to_skip;
+static int __init mshv_iommu_setup_skip(char *str) {
+   pci_devs_to_skip = str;
+
+   return 0;
+}
+/* mshv_iommu_skip=(:BB:DD.F)(:BB:DD.F) */
+__setup("mshv_iommu_skip=", mshv_iommu_setup_skip);
+
 /* DMA remapping support */
 struct hv_iommu_domain {
struct iommu_domain domain;
@@ -774,6 +784,41 @@ static struct iommu_device *hv_iommu_probe_device(struct 
device *dev)
if (!dev_is_pci(dev))
return ERR_PTR(-ENODEV);
 
+   /*
+* Skip the PCI device specified in `pci_devs_to_skip`. This is a
+* temporary solution until we figure out a way to extract information
+* from the hypervisor what devices it is already using.
+*/
+   if (pci_devs_to_skip && *pci_devs_to_skip) {
+   int pos = 0;
+   int parsed;
+   int segment, bus, slot, func;
+   struct pci_dev *pdev = to_pci_dev(dev);
+
+   do {
+   parsed = 0;
+
+   sscanf(pci_devs_to_skip + pos,
+   " (%x:%x:%x.%x) %n",
+   &segment, &bus, &slot, &func, &parsed);
+
+   if (parsed <= 0)
+   break;
+
+   if (pci_domain_nr(pdev->bus) == segment &&
+   pdev->bus->number == bus &&
+   PCI_SLOT(pdev->devfn) == slot &&
+   PCI_FUNC(pdev->devfn) == func)
+   {
+   dev_info(dev, "skipped by MSHV IOMMU\n");
+   return ERR_PTR(-ENODEV);
+   }
+
+   pos += parsed;
+
+   } while (pci_devs_to_skip[pos]);
+   }
+
vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
if (!vdev)
return ERR_PTR(-ENOMEM);
-- 
2.30.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu