On 30/09/2019 12:08, Oliver O'Halloran wrote:
> On PowerNV we use the pcibios_sriov_enable() hook to do two things:
> 
> 1. Create a pci_dn structure for each of the VFs, and
> 2. Configure the PHB's internal BARs that map MMIO ranges to PEs
>    so that each VF has it's own PE. Note that the PE also determines
>    the IOMMU table the HW uses for the device.
> 
> Currently we do not set the pe_number field of the pci_dn immediately after
> assigning the PE number for the VF that it represents. Instead, we do that
> in a fixup (see pnv_pci_dma_dev_setup) which is run inside the
> pcibios_add_device() hook which is run prior to adding the device to the
> bus.
> 
> On PowerNV we add the device to it's IOMMU group using a bus notifier and
> in order for this to work the PE number needs to be known when the bus
> notifier is run. This works today since the PE number is set in the fixup
> which runs before adding the device to the bus. However, if we want to move
> the fixup to a later stage this will break.
> 
> We can fix this by setting the pdn->pe_number inside of
> pcibios_sriov_enable(). There's no good to avoid this since we already have
> all the required information at that point, so... do that. Moving this
> earlier does cause two problems:
> 
> 1. We trip the WARN_ON() in the fixup code, and
> 2. The EEH core will clear pdn->pe_number while recovering VFs.
> 
> The only justification for either of these is a comment in eeh_rmv_device()
> suggesting that pdn->pe_number *must* be set to IODA_INVALID_PE in order
> for the VF to be scanned. However, this comment appears to have no basis in
> reality so just delete it.
> 
> Signed-off-by: Oliver O'Halloran <ooh...@gmail.com>


Tested-by: Alexey Kardashevskiy <a...@ozlabs.ru>
Reviewed-by: Alexey Kardashevskiy <a...@ozlabs.ru>



> ---
> Can't get rid of the fixup entirely since we need it to set the
> ioda_pe->pdev back-pointer. I'll look at killing that another time.
> ---
>  arch/powerpc/kernel/eeh_driver.c          |  6 ------
>  arch/powerpc/platforms/powernv/pci-ioda.c | 19 +++++++++++++++----
>  arch/powerpc/platforms/powernv/pci.c      |  4 ----
>  3 files changed, 15 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/eeh_driver.c 
> b/arch/powerpc/kernel/eeh_driver.c
> index d9279d0..7955fba 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -541,12 +541,6 @@ static void eeh_rmv_device(struct eeh_dev *edev, void 
> *userdata)
>  
>               pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
>               edev->pdev = NULL;
> -
> -             /*
> -              * We have to set the VF PE number to invalid one, which is
> -              * required to plug the VF successfully.
> -              */
> -             pdn->pe_number = IODA_INVALID_PE;
>  #endif
>               if (rmv_data)
>                       list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 5e3172d..70508b3 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1558,6 +1558,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
> u16 num_vfs)
>  
>       /* Reserve PE for each VF */
>       for (vf_index = 0; vf_index < num_vfs; vf_index++) {
> +             int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
> +             int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
> +             struct pci_dn *vf_pdn;
> +
>               if (pdn->m64_single_mode)
>                       pe_num = pdn->pe_num_map[vf_index];
>               else
> @@ -1570,13 +1574,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev 
> *pdev, u16 num_vfs)
>               pe->pbus = NULL;
>               pe->parent_dev = pdev;
>               pe->mve_number = -1;
> -             pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
> -                        pci_iov_virtfn_devfn(pdev, vf_index);
> +             pe->rid = (vf_bus << 8) | vf_devfn;
>  
>               pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
>                       hose->global_number, pdev->bus->number,
> -                     PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
> -                     PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
> +                     PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
>  
>               if (pnv_ioda_configure_pe(phb, pe)) {
>                       /* XXX What do we do here ? */
> @@ -1590,6 +1592,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, 
> u16 num_vfs)
>               list_add_tail(&pe->list, &phb->ioda.pe_list);
>               mutex_unlock(&phb->ioda.pe_list_mutex);
>  
> +             /* associate this pe to it's pdn */
> +             list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
> +                     if (vf_pdn->busno == vf_bus &&
> +                         vf_pdn->devfn == vf_devfn) {
> +                             vf_pdn->pe_number = pe_num;
> +                             break;
> +                     }
> +             }
> +
>               pnv_pci_ioda2_setup_dma_pe(phb, pe);
>  #ifdef CONFIG_IOMMU_API
>               iommu_register_group(&pe->table_group,
> diff --git a/arch/powerpc/platforms/powernv/pci.c 
> b/arch/powerpc/platforms/powernv/pci.c
> index 2825d00..b7761e2 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -816,16 +816,12 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>       struct pnv_phb *phb = hose->private_data;
>  #ifdef CONFIG_PCI_IOV
>       struct pnv_ioda_pe *pe;
> -     struct pci_dn *pdn;
>  
>       /* Fix the VF pdn PE number */
>       if (pdev->is_virtfn) {
> -             pdn = pci_get_pdn(pdev);
> -             WARN_ON(pdn->pe_number != IODA_INVALID_PE);
>               list_for_each_entry(pe, &phb->ioda.pe_list, list) {
>                       if (pe->rid == ((pdev->bus->number << 8) |
>                           (pdev->devfn & 0xff))) {
> -                             pdn->pe_number = pe->pe_number;
>                               pe->pdev = pdev;
>                               break;
>                       }
> 

-- 
Alexey

Reply via email to