On Wed, Jul 16, 2014 at 08:24:30PM +0800, Guo Chao wrote:
>This patch enables M64 aperatus for PHB3.
>
>We already had platform hook (ppc_md.pcibios_window_alignment) to affect
>the PCI resource assignment done in PCI core so that each PE's M32 resource
>was built on basis of M32 segment size. Similarly, we're using that for
>M64 assignment on basis of M64 segment size.
>
>   * We're using last M64 BAR to cover M64 aperatus, and it's shared by all
>     256 PEs.
>   * We don't support P7IOC yet. However, some function callbacks are added
>     to (struct pnv_phb) so that we can reuse them on P7IOC in future.
>   * PE, corresponding to PCI bus with large M64 BAR device attached, might
>     span multiple M64 segments. We introduce "compound" PE to cover the case.
>     The compound PE is a list of PEs and the master PE is used as before.
>     The slave PEs are just for MMIO isolation.
>
>Signed-off-by: Guo Chao <y...@linux.vnet.ibm.com>

Reviewed-by: Gavin Shan <gws...@linux.vnet.ibm.com>

It looks good to me except the PELTV bits (I told you before), which can be
fixed later. Without PELTV, we can rely software to maintain master/slave
business. However, it's worthy to have PELTV setup correctly so that inbound
ER errors can freeze multiple PEs (if applicable) by hardware.

Also, I run it on P7 box and no problem found there.

Thanks,
Gavin

>---
> arch/powerpc/include/asm/opal.h           |   8 +-
> arch/powerpc/platforms/powernv/pci-ioda.c | 301 +++++++++++++++++++++++++++---
> arch/powerpc/platforms/powernv/pci.h      |  20 ++
> 3 files changed, 307 insertions(+), 22 deletions(-)
>
>diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
>index 0da1dbd..ae885cc 100644
>--- a/arch/powerpc/include/asm/opal.h
>+++ b/arch/powerpc/include/asm/opal.h
>@@ -340,6 +340,12 @@ enum OpalMveEnableAction {
>       OPAL_ENABLE_MVE = 1
> };
>
>+enum OpalM64EnableAction {
>+      OPAL_DISABLE_M64 = 0,
>+      OPAL_ENABLE_M64_SPLIT = 1,
>+      OPAL_ENABLE_M64_NON_SPLIT = 2
>+};
>+
> enum OpalPciResetScope {
>       OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3,
>       OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5,
>@@ -768,7 +774,7 @@ int64_t opal_pci_set_phb_mem_window(uint64_t phb_id, 
>uint16_t window_type,
>                                   uint16_t window_num,
>                                   uint64_t starting_real_address,
>                                   uint64_t starting_pci_address,
>-                                  uint16_t segment_size);
>+                                  uint64_t size);
> int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number,
>                                   uint16_t window_type, uint16_t window_num,
>                                   uint16_t segment_num);
>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
>b/arch/powerpc/platforms/powernv/pci-ioda.c
>index 93fd815..2b659d9 100644
>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>@@ -36,6 +36,7 @@
> #include <asm/tce.h>
> #include <asm/xics.h>
> #include <asm/debug.h>
>+#include <asm/firmware.h>
>
> #include "powernv.h"
> #include "pci.h"
>@@ -82,6 +83,12 @@ static inline void __raw_rm_writeq(u64 val, volatile void 
>__iomem *paddr)
>               : : "r" (val), "r" (paddr) : "memory");
> }
>
>+static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
>+{
>+      return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
>+              (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
>+}
>+
> static int pnv_ioda_alloc_pe(struct pnv_phb *phb)
> {
>       unsigned long pe;
>@@ -106,6 +113,240 @@ static void pnv_ioda_free_pe(struct pnv_phb *phb, int pe)
>       clear_bit(pe, phb->ioda.pe_alloc);
> }
>
>+/* The default M64 BAR is shared by all PEs */
>+static int pnv_ioda2_init_m64(struct pnv_phb *phb)
>+{
>+      const char *desc;
>+      struct resource *r;
>+      s64 rc;
>+
>+      /* Configure the default M64 BAR */
>+      rc = opal_pci_set_phb_mem_window(phb->opal_id,
>+                                       OPAL_M64_WINDOW_TYPE,
>+                                       phb->ioda.m64_bar_idx,
>+                                       phb->ioda.m64_base,
>+                                       0, /* unused */
>+                                       phb->ioda.m64_size);
>+      if (rc != OPAL_SUCCESS) {
>+              desc = "configuring";
>+              goto fail;
>+      }
>+
>+      /* Enable the default M64 BAR */
>+      rc = opal_pci_phb_mmio_enable(phb->opal_id,
>+                                    OPAL_M64_WINDOW_TYPE,
>+                                    phb->ioda.m64_bar_idx,
>+                                    OPAL_ENABLE_M64_SPLIT);
>+      if (rc != OPAL_SUCCESS) {
>+              desc = "enabling";
>+              goto fail;
>+      }
>+
>+      /* Mark the M64 BAR assigned */
>+      set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc);
>+
>+      /*
>+       * Strip off the segment used by the reserved PE, which is
>+       * expected to be 0 or last one of PE capabicity.
>+       */
>+      r = &phb->hose->mem_resources[1];
>+      if (phb->ioda.reserved_pe == 0)
>+              r->start += phb->ioda.m64_segsize;
>+      else if (phb->ioda.reserved_pe == (phb->ioda.total_pe - 1))
>+              r->end -= phb->ioda.m64_segsize;
>+      else
>+              pr_warn("  Cannot strip M64 segment for reserved PE#%d\n",
>+                      phb->ioda.reserved_pe);
>+
>+      return 0;
>+
>+fail:
>+      pr_warn("  Failure %lld %s M64 BAR#%d\n",
>+              rc, desc, phb->ioda.m64_bar_idx);
>+      opal_pci_phb_mmio_enable(phb->opal_id,
>+                               OPAL_M64_WINDOW_TYPE,
>+                               phb->ioda.m64_bar_idx,
>+                               OPAL_DISABLE_M64);
>+      return -EIO;
>+}
>+
>+static void pnv_ioda2_alloc_m64_pe(struct pnv_phb *phb)
>+{
>+      resource_size_t sgsz = phb->ioda.m64_segsize;
>+      struct pci_dev *pdev;
>+      struct resource *r;
>+      int base, step, i;
>+
>+      /*
>+       * Root bus always has full M64 range and root port has
>+       * M64 range used in reality. So we're checking root port
>+       * instead of root bus.
>+       */
>+      list_for_each_entry(pdev, &phb->hose->bus->devices, bus_list) {
>+              for (i = PCI_BRIDGE_RESOURCES;
>+                   i <= PCI_BRIDGE_RESOURCE_END; i++) {
>+                      r = &pdev->resource[i];
>+                      if (!r->parent ||
>+                          !pnv_pci_is_mem_pref_64(r->flags))
>+                              continue;
>+
>+                      base = (r->start - phb->ioda.m64_base) / sgsz;
>+                      for (step = 0; step < resource_size(r) / sgsz; step++)
>+                              set_bit(base + step, phb->ioda.pe_alloc);
>+              }
>+      }
>+}
>+
>+static int pnv_ioda2_pick_m64_pe(struct pnv_phb *phb,
>+                               struct pci_bus *bus, int all)
>+{
>+      resource_size_t segsz = phb->ioda.m64_segsize;
>+      struct pci_dev *pdev;
>+      struct resource *r;
>+      struct pnv_ioda_pe *master_pe, *pe;
>+      unsigned long size, *pe_alloc;
>+      bool found;
>+      int start, i, j;
>+
>+      /* Root bus shouldn't use M64 */
>+      if (pci_is_root_bus(bus))
>+              return IODA_INVALID_PE;
>+
>+      /* We support only one M64 window on each bus */
>+      found = false;
>+      pci_bus_for_each_resource(bus, r, i) {
>+              if (r && r->parent &&
>+                  pnv_pci_is_mem_pref_64(r->flags)) {
>+                      found = true;
>+                      break;
>+              }
>+      }
>+
>+      /* No M64 window found ? */
>+      if (!found)
>+              return IODA_INVALID_PE;
>+
>+      /* Allocate bitmap */
>+      size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
>+      pe_alloc = kzalloc(size, GFP_KERNEL);
>+      if (!pe_alloc) {
>+              pr_warn("%s: Out of memory !\n",
>+                      __func__);
>+              return IODA_INVALID_PE;
>+      }
>+
>+      /*
>+       * Figure out reserved PE numbers by the PE
>+       * the its child PEs.
>+       */
>+      start = (r->start - phb->ioda.m64_base) / segsz;
>+      for (i = 0; i < resource_size(r) / segsz; i++)
>+              set_bit(start + i, pe_alloc);
>+
>+      if (all)
>+              goto done;
>+
>+      /*
>+       * If the PE doesn't cover all subordinate buses,
>+       * we need subtract from reserved PEs for children.
>+       */
>+      list_for_each_entry(pdev, &bus->devices, bus_list) {
>+              if (!pdev->subordinate)
>+                      continue;
>+
>+              pci_bus_for_each_resource(pdev->subordinate, r, i) {
>+                      if (!r || !r->parent ||
>+                          !pnv_pci_is_mem_pref_64(r->flags))
>+                              continue;
>+
>+                      start = (r->start - phb->ioda.m64_base) / segsz;
>+                      for (j = 0; j < resource_size(r) / segsz ; j++)
>+                              clear_bit(start + j, pe_alloc);
>+                }
>+        }
>+
>+      /*
>+       * the current bus might not own M64 window and that's all
>+       * contributed by its child buses. For the case, we needn't
>+       * pick M64 dependent PE#.
>+       */
>+      if (bitmap_empty(pe_alloc, phb->ioda.total_pe)) {
>+              kfree(pe_alloc);
>+              return IODA_INVALID_PE;
>+      }
>+
>+      /*
>+       * Figure out the master PE and put all slave PEs to master
>+       * PE's list to form compound PE.
>+       */
>+done:
>+      master_pe = NULL;
>+      i = -1;
>+      while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe, i + 1)) <
>+              phb->ioda.total_pe) {
>+              pe = &phb->ioda.pe_array[i];
>+              pe->phb = phb;
>+              pe->pe_number = i;
>+
>+              if (!master_pe) {
>+                      pe->flags |= PNV_IODA_PE_MASTER;
>+                      INIT_LIST_HEAD(&pe->slaves);
>+                      master_pe = pe;
>+              } else {
>+                      pe->flags |= PNV_IODA_PE_SLAVE;
>+                      pe->master = master_pe;
>+                      list_add_tail(&pe->list, &master_pe->slaves);
>+              }
>+      }
>+
>+      kfree(pe_alloc);
>+      return master_pe->pe_number;
>+}
>+
>+static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
>+{
>+      struct pci_controller *hose = phb->hose;
>+      struct device_node *dn = hose->dn;
>+      struct resource *res;
>+      const u32 *r;
>+      u64 pci_addr;
>+
>+      if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
>+              pr_info("  Firmware too old to support M64 window\n");
>+              return;
>+      }
>+
>+      r = of_get_property(dn, "ibm,opal-m64-window", NULL);
>+      if (!r) {
>+              pr_info("  No <ibm,opal-m64-window> on %s\n",
>+                      dn->full_name);
>+              return;
>+      }
>+
>+      /* FIXME: Support M64 for P7IOC */
>+      if (phb->type != PNV_PHB_IODA2) {
>+              pr_info("  Not support M64 window\n");
>+              return;
>+      }
>+
>+      res = &hose->mem_resources[1];
>+      res->start = of_translate_address(dn, r + 2);
>+      res->end = res->start + of_read_number(r + 4, 2) - 1;
>+      res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
>+      pci_addr = of_read_number(r, 2);
>+      hose->mem_offset[1] = res->start - pci_addr;
>+
>+      phb->ioda.m64_size = resource_size(res);
>+      phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe;
>+      phb->ioda.m64_base = pci_addr;
>+
>+      /* Use last M64 BAR to cover M64 window */
>+      phb->ioda.m64_bar_idx = 15;
>+      phb->init_m64 = pnv_ioda2_init_m64;
>+      phb->alloc_m64_pe = pnv_ioda2_alloc_m64_pe;
>+      phb->pick_m64_pe = pnv_ioda2_pick_m64_pe;
>+}
>+
> /* Currently those 2 are only used when MSIs are enabled, this will change
>  * but in the meantime, we need to protect them to avoid warnings
>  */
>@@ -363,9 +604,16 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, 
>int all)
>       struct pci_controller *hose = pci_bus_to_host(bus);
>       struct pnv_phb *phb = hose->private_data;
>       struct pnv_ioda_pe *pe;
>-      int pe_num;
>+      int pe_num = IODA_INVALID_PE;
>+
>+      /* Check if PE is determined by M64 */
>+      if (phb->pick_m64_pe)
>+              pe_num = phb->pick_m64_pe(phb, bus, all);
>+
>+      /* The PE number isn't pinned by M64 */
>+      if (pe_num == IODA_INVALID_PE)
>+              pe_num = pnv_ioda_alloc_pe(phb);
>
>-      pe_num = pnv_ioda_alloc_pe(phb);
>       if (pe_num == IODA_INVALID_PE) {
>               pr_warning("%s: Not enough PE# available for PCI bus 
> %04x:%02x\n",
>                       __func__, pci_domain_nr(bus), bus->number);
>@@ -373,7 +621,7 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, int 
>all)
>       }
>
>       pe = &phb->ioda.pe_array[pe_num];
>-      pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
>+      pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
>       pe->pbus = bus;
>       pe->pdev = NULL;
>       pe->tce32_seg = -1;
>@@ -441,8 +689,15 @@ static void pnv_ioda_setup_PEs(struct pci_bus *bus)
> static void pnv_pci_ioda_setup_PEs(void)
> {
>       struct pci_controller *hose, *tmp;
>+      struct pnv_phb *phb;
>
>       list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
>+              phb = hose->private_data;
>+
>+              /* M64 layout might affect PE allocation */
>+              if (phb->alloc_m64_pe)
>+                      phb->alloc_m64_pe(phb);
>+
>               pnv_ioda_setup_PEs(hose->bus);
>       }
> }
>@@ -1067,9 +1322,6 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller 
>*hose,
>                               index++;
>                       }
>               } else if (res->flags & IORESOURCE_MEM) {
>-                      /* WARNING: Assumes M32 is mem region 0 in PHB. We need 
>to
>-                       * harden that algorithm when we start supporting M64
>-                       */
>                       region.start = res->start -
>                                      hose->mem_offset[0] -
>                                      phb->ioda.m32_pci_base;
>@@ -1190,7 +1442,10 @@ static resource_size_t pnv_pci_window_alignment(struct 
>pci_bus *bus,
>               bridge = bridge->bus->self;
>       }
>
>-      /* We need support prefetchable memory window later */
>+      /* We fail back to M32 if M64 isn't supported */
>+      if (phb->ioda.m64_segsize &&
>+          pnv_pci_is_mem_pref_64(type))
>+              return phb->ioda.m64_segsize;
>       if (type & IORESOURCE_MEM)
>               return phb->ioda.m32_segsize;
>
>@@ -1311,6 +1566,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node 
>*np,
>       prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
>       if (prop32)
>               phb->ioda.reserved_pe = be32_to_cpup(prop32);
>+
>+      /* Parse 64-bit MMIO range */
>+      pnv_ioda_parse_m64_window(phb);
>+
>       phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
>       /* FW Has already off top 64k of M32 space (MSI space) */
>       phb->ioda.m32_size += 0x10000;
>@@ -1346,14 +1605,6 @@ void __init pnv_pci_init_ioda_phb(struct device_node 
>*np,
>       /* Calculate how many 32-bit TCE segments we have */
>       phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
>
>-      /* Clear unusable m64 */
>-      hose->mem_resources[1].flags = 0;
>-      hose->mem_resources[1].start = 0;
>-      hose->mem_resources[1].end = 0;
>-      hose->mem_resources[2].flags = 0;
>-      hose->mem_resources[2].start = 0;
>-      hose->mem_resources[2].end = 0;
>-
> #if 0 /* We should really do that ... */
>       rc = opal_pci_set_phb_mem_window(opal->phb_id,
>                                        window_type,
>@@ -1363,12 +1614,16 @@ void __init pnv_pci_init_ioda_phb(struct device_node 
>*np,
>                                        segment_size);
> #endif
>
>-      pr_info("  %d (%d) PE's M32: 0x%x [segment=0x%x]"
>-              " IO: 0x%x [segment=0x%x]\n",
>-              phb->ioda.total_pe,
>-              phb->ioda.reserved_pe,
>-              phb->ioda.m32_size, phb->ioda.m32_segsize,
>-              phb->ioda.io_size, phb->ioda.io_segsize);
>+      pr_info("  %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
>+              phb->ioda.total_pe, phb->ioda.reserved_pe,
>+              phb->ioda.m32_size, phb->ioda.m32_segsize);
>+      if (phb->ioda.m64_size)
>+              pr_info("                 M64: 0x%lx [segment=0x%lx]\n",
>+                      phb->ioda.m64_size, phb->ioda.m64_segsize);
>+      if (phb->ioda.io_size)
>+              pr_info("                  IO: 0x%x [segment=0x%x]\n",
>+                      phb->ioda.io_size, phb->ioda.io_segsize);
>+
>
>       phb->hose->ops = &pnv_pci_ops;
> #ifdef CONFIG_EEH
>@@ -1416,6 +1671,10 @@ void __init pnv_pci_init_ioda_phb(struct device_node 
>*np,
>               ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
>               ioda_eeh_phb_reset(hose, OPAL_DEASSERT_RESET);
>       }
>+
>+      /* Configure M64 window */
>+      if (phb->init_m64 && phb->init_m64(phb))
>+              hose->mem_resources[1].flags = 0;
> }
>
> void __init pnv_pci_init_ioda2_phb(struct device_node *np)
>diff --git a/arch/powerpc/platforms/powernv/pci.h 
>b/arch/powerpc/platforms/powernv/pci.h
>index 676232c..def7171 100644
>--- a/arch/powerpc/platforms/powernv/pci.h
>+++ b/arch/powerpc/platforms/powernv/pci.h
>@@ -21,6 +21,8 @@ enum pnv_phb_model {
> #define PNV_IODA_PE_DEV               (1 << 0)        /* PE has single PCI 
> device     */
> #define PNV_IODA_PE_BUS               (1 << 1)        /* PE has primary PCI 
> bus       */
> #define PNV_IODA_PE_BUS_ALL   (1 << 2)        /* PE has subordinate buses     
> */
>+#define PNV_IODA_PE_MASTER    (1 << 3)        /* Master PE in compound case   
>*/
>+#define PNV_IODA_PE_SLAVE     (1 << 4)        /* Slave PE in compound case    
>*/
>
> /* Data associated with a PE, including IOMMU tracking etc.. */
> struct pnv_phb;
>@@ -64,6 +66,10 @@ struct pnv_ioda_pe {
>        */
>       int                     mve_number;
>
>+      /* PEs in compound case */
>+      struct pnv_ioda_pe      *master;
>+      struct list_head        slaves;
>+
>       /* Link in list of PE#s */
>       struct list_head        dma_link;
>       struct list_head        list;
>@@ -119,6 +125,9 @@ struct pnv_phb {
>       void (*fixup_phb)(struct pci_controller *hose);
>       u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
>       void (*shutdown)(struct pnv_phb *phb);
>+      int (*init_m64)(struct pnv_phb *phb);
>+      void (*alloc_m64_pe)(struct pnv_phb *phb);
>+      int (*pick_m64_pe)(struct pnv_phb *phb, struct pci_bus *bus, int all);
>
>       union {
>               struct {
>@@ -129,9 +138,20 @@ struct pnv_phb {
>                       /* Global bridge info */
>                       unsigned int            total_pe;
>                       unsigned int            reserved_pe;
>+
>+                      /* 32-bit MMIO window */
>                       unsigned int            m32_size;
>                       unsigned int            m32_segsize;
>                       unsigned int            m32_pci_base;
>+
>+                      /* 64-bit MMIO window */
>+                      unsigned int            m64_bar_idx;
>+                      unsigned long           m64_size;
>+                      unsigned long           m64_segsize;
>+                      unsigned long           m64_base;
>+                      unsigned long           m64_bar_alloc;
>+
>+                      /* IO ports */
>                       unsigned int            io_size;
>                       unsigned int            io_segsize;
>                       unsigned int            io_pci_base;
>-- 
>1.9.1
>

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to