在 2021年01月15日 23:15, Robin Murphy 写道: > On 2021-01-15 14:26, lijiang wrote: >> Hi, Robin >> >> Thank you for the comment. >> >> 在 2021年01月13日 01:29, Robin Murphy 写道: >>> On 2021-01-05 07:52, lijiang wrote: >>>> 在 2021年01月05日 11:55, lijiang 写道: >>>>> Hi, >>>>> >>>>> Also add Joerg to cc list. >>>>> >>>> >>>> Also add more people to cc list, Jerry Snitselaar and Tom Lendacky. >>>> >>>> Thanks. >>>> >>>>> Thanks. >>>>> Lianbo >>>>> 在 2020年12月26日 13:39, Lianbo Jiang 写道: >>>>>> Currently, because domain attach allows to be deferred from iommu >>>>>> driver to device driver, and when iommu initializes, the devices >>>>>> on the bus will be scanned and the default groups will be allocated. >>>>>> >>>>>> Due to the above changes, some devices could be added to the same >>>>>> group as below: >>>>>> >>>>>> [ 3.859417] pci 0000:01:00.0: Adding to iommu group 16 >>>>>> [ 3.864572] pci 0000:01:00.1: Adding to iommu group 16 >>>>>> [ 3.869738] pci 0000:02:00.0: Adding to iommu group 17 >>>>>> [ 3.874892] pci 0000:02:00.1: Adding to iommu group 17 >>>>>> >>>>>> But when attaching these devices, it doesn't allow that a group has >>>>>> more than one device, otherwise it will return an error. This conflicts >>>>>> with the deferred attaching. Unfortunately, it has two devices in the >>>>>> same group for my side, for example: >>>>>> >>>>>> [ 9.627014] iommu_group_device_count(): device name[0]:0000:01:00.0 >>>>>> [ 9.633545] iommu_group_device_count(): device name[1]:0000:01:00.1 >>>>>> ... >>>>>> [ 10.255609] iommu_group_device_count(): device name[0]:0000:02:00.0 >>>>>> [ 10.262144] iommu_group_device_count(): device name[1]:0000:02:00.1 >>>>>> >>>>>> Finally, which caused the failure of tg3 driver when tg3 driver calls >>>>>> the dma_alloc_coherent() to allocate coherent memory in the >>>>>> tg3_test_dma(). >>>>>> >>>>>> [ 9.660310] tg3 0000:01:00.0: DMA engine test failed, aborting >>>>>> [ 9.754085] tg3: probe of 0000:01:00.0 failed with error -12 >>>>>> [ 9.997512] tg3 0000:01:00.1: DMA engine test failed, aborting >>>>>> [ 10.043053] tg3: probe of 0000:01:00.1 failed with error -12 >>>>>> [ 10.288905] tg3 0000:02:00.0: DMA engine test failed, aborting >>>>>> [ 10.334070] tg3: probe of 0000:02:00.0 failed with error -12 >>>>>> [ 10.578303] tg3 0000:02:00.1: DMA engine test failed, aborting >>>>>> [ 10.622629] tg3: probe of 0000:02:00.1 failed with error -12 >>>>>> >>>>>> In addition, the similar situations also occur in other drivers such >>>>>> as the bnxt_en driver. That can be reproduced easily in kdump kernel >>>>>> when SME is active. >>>>>> >>>>>> Add a check for the deferred attach in the iommu_attach_device() and >>>>>> allow to attach the deferred device regardless of how many devices >>>>>> are in a group. >>> >>> Is this iommu_attach_device() call is coming from iommu-dma? (if not, then >>> whoever's calling it probably shouldn't be) >>> >> >> Yes, you are right, the iommu_attach_device call is coming from iommu-dma. >> >>> Assuming so, then probably what should happen is to move the handling >>> currently in iommu_dma_deferred_attach() into the core so that it can call >>> __iommu_attach_device() directly - the intent is just to replay that exact >>> call skipped in iommu_group_add_device(), so the legacy external >>> iommu_attach_device() interface isn't really the right tool for the job >> >> Sounds good. I will check if this can work in various cases. If it's OK, I >> will post again. >> >> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c >> index f0305e6aac1b..5e7da902ac36 100644 >> --- a/drivers/iommu/dma-iommu.c >> +++ b/drivers/iommu/dma-iommu.c >> @@ -23,7 +23,6 @@ >> #include <linux/swiotlb.h> >> #include <linux/scatterlist.h> >> #include <linux/vmalloc.h> >> -#include <linux/crash_dump.h> >> #include <linux/dma-direct.h> >> struct iommu_dma_msi_page { >> @@ -378,21 +377,6 @@ static int iommu_dma_init_domain(struct iommu_domain >> *domain, dma_addr_t base, >> return iova_reserve_iommu_regions(dev, domain); >> } >> -static int iommu_dma_deferred_attach(struct device *dev, >> - struct iommu_domain *domain) >> -{ >> - const struct iommu_ops *ops = domain->ops; >> - >> - if (!is_kdump_kernel()) >> - return 0; >> - >> - if (unlikely(ops->is_attach_deferred && >> - ops->is_attach_deferred(domain, dev))) >> - return iommu_attach_device(domain, dev); >> - >> - return 0; >> -} >> - >> /** >> * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU >> API >> * page flags. >> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c >> index ffeebda8d6de..4fed1567b498 100644 >> --- a/drivers/iommu/iommu.c >> +++ b/drivers/iommu/iommu.c >> @@ -23,6 +23,7 @@ >> #include <linux/property.h> >> #include <linux/fsl/mc.h> >> #include <linux/module.h> >> +#include <linux/crash_dump.h> >> #include <trace/events/iommu.h> >> static struct kset *iommu_group_kset; >> @@ -1952,6 +1953,21 @@ static int __iommu_attach_device(struct iommu_domain >> *domain, >> return ret; >> } >> +int iommu_dma_deferred_attach(struct device *dev, >> + struct iommu_domain *domain) >> +{ >> + const struct iommu_ops *ops = domain->ops; >> + >> + if (!is_kdump_kernel()) >> + return 0; >> + >> + if (unlikely(ops->is_attach_deferred && >> + ops->is_attach_deferred(domain, dev))) >> + return __iommu_attach_device(domain, dev); >> + >> + return 0; >> +} >> + >> int iommu_attach_device(struct iommu_domain *domain, struct device *dev) >> { >> struct iommu_group *group; >> diff --git a/include/linux/iommu.h b/include/linux/iommu.h >> index b3f0e2018c62..8e0ee96ca456 100644 >> --- a/include/linux/iommu.h >> +++ b/include/linux/iommu.h >> @@ -424,6 +424,8 @@ extern struct iommu_group *iommu_group_get_by_id(int id); >> extern void iommu_domain_free(struct iommu_domain *domain); >> extern int iommu_attach_device(struct iommu_domain *domain, >> struct device *dev); >> +extern int iommu_dma_deferred_attach(struct device *dev, >> + struct iommu_domain *domain); >> extern void iommu_detach_device(struct iommu_domain *domain, >> struct device *dev); >> extern int iommu_uapi_cache_invalidate(struct iommu_domain *domain, >> @@ -680,6 +682,12 @@ static inline int iommu_attach_device(struct >> iommu_domain *domain, >> return -ENODEV; >> } >> +static inline int iommu_dma_deferred_attach(struct device *dev, >> + struct iommu_domain *domain) >> +{ >> + return -ENODEV; >> +} >> + >> static inline void iommu_detach_device(struct iommu_domain *domain, >> struct device *dev) >> { > > Yeah, that's more or less what I had in mind (FWIW I don't think we need the > stub definition since this should only ever be called by other > IOMMU-API-dependent code).
Seems yes, I can remove the stub definition. I just checked the driver/iommu/Kconfig, the IOMMU_API always gets selected by default, and the IOMMU_DMA depends on IOMMU_API. > However I'd really like to minimise the fast-path impact to the normal case, > so how about throwing something like this into the mix as well? That's good idea although the implementation of the is_kdump_kernel() is very simple. But I could split them into two patches so that it can be reviewed conveniently: [1] [PATCH 1/2] dma-iommu: use static-key to minimize the impact in the fast-path Let's move out the is_kdump_kernel() check from iommu_dma_deferred_attach() to iommu_dma_init(), and use the static-key in the fast-path to minimize the impact in the normal case. [2] [PATCH 2/2] iommu: use the __iommu_attach_device() directly for deffered attach Let's move the handling currently in iommu_dma_deferred_attach() into the iommu core code so that it can call the __iommu_attach_device() directly instead of the iommu_attach_device(). And change the name of iommu_dma_deferred_attach() to iommu_do_deferred_attach(). >(where iommu_do_deferred_attach() represents what you have above minus the >move of the kdump check) > OK, understood, thanks. Would you mind adding your Signed-off-by to these patches when I post again? If you have no objection, I will post these two changes(patches) later. Thanks. Lianbo > Cheers, > Robin. > > ----->8----- > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c > index 4078358ed66e..638222558248 100644 > --- a/drivers/iommu/dma-iommu.c > +++ b/drivers/iommu/dma-iommu.c > @@ -51,6 +51,8 @@ struct iommu_dma_cookie { > struct iommu_domain *fq_domain; > }; > > +DEFINE_STATIC_KEY_FALSE(deferred_attach); > + > void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, > struct iommu_domain *domain) > { > @@ -378,21 +380,6 @@ static int iommu_dma_init_domain(struct iommu_domain > *domain, dma_addr_t base, > return iova_reserve_iommu_regions(dev, domain); > } > > -static int iommu_dma_deferred_attach(struct device *dev, > - struct iommu_domain *domain) > -{ > - const struct iommu_ops *ops = domain->ops; > - > - if (!is_kdump_kernel()) > - return 0; > - > - if (unlikely(ops->is_attach_deferred && > - ops->is_attach_deferred(domain, dev))) > - return iommu_attach_device(domain, dev); > - > - return 0; > -} > - > /** > * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU > API > * page flags. > @@ -535,7 +522,8 @@ static dma_addr_t __iommu_dma_map(struct device *dev, > phys_addr_t phys, > size_t iova_off = iova_offset(iovad, phys); > dma_addr_t iova; > > - if (unlikely(iommu_dma_deferred_attach(dev, domain))) > + if (static_branch_unlikely(&deferred_attach) && > + iommu_do_deferred_attach(domain, dev)) > return DMA_MAPPING_ERROR; > > size = iova_align(iovad, size + iova_off); > @@ -693,7 +681,8 @@ static void *iommu_dma_alloc_remap(struct device *dev, > size_t size, > > *dma_handle = DMA_MAPPING_ERROR; > > - if (unlikely(iommu_dma_deferred_attach(dev, domain))) > + if (static_branch_unlikely(&deferred_attach) && > + iommu_do_deferred_attach(domain, dev)) > return NULL; > > min_size = alloc_sizes & -alloc_sizes; > @@ -976,7 +965,8 @@ static int iommu_dma_map_sg(struct device *dev, struct > scatterlist *sg, > unsigned long mask = dma_get_seg_boundary(dev); > int i; > > - if (unlikely(iommu_dma_deferred_attach(dev, domain))) > + if (static_branch_unlikely(&deferred_attach) && > + iommu_do_deferred_attach(domain, dev)) > return 0; > > if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) > @@ -1424,6 +1414,9 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, > > static int iommu_dma_init(void) > { > + if (is_kdump_kernel()) > + static_branch_enable(&deferred_attach); > + > return iova_cache_get(); > } > arch_initcall(iommu_dma_init); >