powernv: ocxl move TL definition

christophe lombard Mon, 14 Oct 2019 08:33:41 -0700

On 14/10/2019 12:21, Frederic Barrat wrote:



Le 09/10/2019 à 17:11, christophe lombard a écrit :

Specifies the templates in the Transaction Layer that the OpenCAPIdevice/host

support when transmitting/receiving DL/DLX frames to or from the OpenCAPI
device/host.

Update, rename and create new few platform-specific calls which can beused by

drivers.

No functional change.

Signed-off-by: Christophe Lombard <clomb...@linux.vnet.ibm.com>
---
  arch/powerpc/include/asm/pnv-ocxl.h   |   5 +-
  arch/powerpc/platforms/powernv/ocxl.c | 103 ++++++++++++++++++++++++--
  drivers/misc/ocxl/config.c            |  89 +---------------------
  3 files changed, 99 insertions(+), 98 deletions(-)

diff --git a/arch/powerpc/include/asm/pnv-ocxl.hb/arch/powerpc/include/asm/pnv-ocxl.h

index 8e516e339e6c..b8c68878b4ba 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h

@@ -13,10 +13,7 @@ extern int pnv_ocxl_get_actag(struct pci_dev *dev,u16 *base, u16 *enabled,

              u16 *supported);
  extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);

-extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
-            char *rate_buf, int rate_buf_size);
-extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
-            uint64_t rate_buf_phys, int rate_buf_size);
+extern int pnv_ocxl_set_TL(struct pci_dev *dev, int tl_dvsec);

  extern int pnv_ocxl_platform_setup(struct pci_dev *dev,
                     int PE_mask, int *hwirq,

diff --git a/arch/powerpc/platforms/powernv/ocxl.cb/arch/powerpc/platforms/powernv/ocxl.c

index 4d26cba12b63..351324cffc2b 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c

@@ -369,8 +369,8 @@ static void set_templ_rate(unsigned int templ,unsigned int rate, char *buf)

      buf[idx] |= rate << shift;
  }

-int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
-            char *rate_buf, int rate_buf_size)
+static int get_tl_cap(struct pci_dev *dev, long *cap,
+              char *rate_buf, int rate_buf_size)
  {
      if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
          return -EINVAL;

@@ -390,10 +390,9 @@ int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long*cap,

      *cap = PNV_OCXL_TL_P9_RECV_CAP;
      return 0;
  }
-EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);

-int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
-            uint64_t rate_buf_phys, int rate_buf_size)
+static int set_tl_conf(struct pci_dev *dev, long cap,
+               uint64_t rate_buf_phys, int rate_buf_size)
  {
      struct pci_controller *hose = pci_bus_to_host(dev->bus);
      struct pnv_phb *phb = hose->private_data;

@@ -410,7 +409,99 @@ int pnv_ocxl_set_tl_conf(struct pci_dev *dev,long cap,

      }
      return 0;
  }
-EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
+
+int pnv_ocxl_set_TL(struct pci_dev *dev, int tl_dvsec)
+{
+    u32 val;
+    __be32 *be32ptr;
+    u8 timers;
+    int i, rc;
+    long recv_cap;
+    char *recv_rate;
+
+    recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
+    if (!recv_rate)
+        return -ENOMEM;
+    /*
+     * The spec defines 64 templates for messages in the
+     * Transaction Layer (TL).
+     *
+     * The host and device each support a subset, so we need to
+     * configure the transmitters on each side to send only
+     * templates the receiver understands, at a rate the receiver
+     * can process.  Per the spec, template 0 must be supported by
+     * everybody. That's the template which has been used by the
+     * host and device so far.
+     *
+     * The sending rate limit must be set before the template is
+     * enabled.
+     */
+
+    /*
+     * Device -> host
+     */
+    rc = get_tl_cap(dev, &recv_cap, recv_rate,
+            PNV_OCXL_TL_RATE_BUF_SIZE);
+    if (rc)
+        goto out;
+
+    for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+        be32ptr = (__be32 *) &recv_rate[i];
+        pci_write_config_dword(dev,
+                tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
+                be32_to_cpu(*be32ptr));
+    }
+    val = recv_cap >> 32;
+    pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
+    val = recv_cap & GENMASK(31, 0);

+ pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP +4, val);

+
+    /*
+     * Host -> device
+     */
+    for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+        pci_read_config_dword(dev,
+                tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
+                &val);
+        be32ptr = (__be32 *) &recv_rate[i];
+        *be32ptr = cpu_to_be32(val);
+    }
+    pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
+    recv_cap = (long) val << 32;

+ pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4,&val);

+    recv_cap |= val;
+
+    rc = set_tl_conf(dev, recv_cap, __pa(recv_rate),
+             PNV_OCXL_TL_RATE_BUF_SIZE);
+    if (rc)
+        goto out;
+
+    /*
+     * Opencapi commands needing to be retried are classified per
+     * the TL in 2 groups: short and long commands.
+     *
+     * The short back off timer it not used for now. It will be
+     * for opencapi 4.0.
+     *
+     * The long back off timer is typically used when an AFU hits
+     * a page fault but the NPU is already processing one. So the
+     * AFU needs to wait before it can resubmit. Having a value
+     * too low doesn't break anything, but can generate extra
+     * traffic on the link.
+     * We set it to 1.6 us for now. It's shorter than, but in the
+     * same order of magnitude as the time spent to process a page
+     * fault.
+     */
+    timers = 0x2 << 4; /* long timer = 1.6 us */
+    pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
+            timers);
+

How does it work in the virtualized case? We would also need to do thoseconfig space reads and writes. I'm guessing it's all handled in the hostbehind a hcall, as we don't really want to have the guest mess with thelink configuration?


A specific option (H_CONFIG_ADAPTER_SET_TL) through the hcall
H_OCXL_CONFIG_ADAPTER allows the guest to call pnv_ocxl_set_TL(), like
the ocxl driver running on the host.
All new pnv_* api have been created to configurate and handle the capi
device for the ocxl driver (running on the host) and for the guest,
through a new vfio driver. This new vfio driver will be in charge,
according the hcall options, to call the right api.

   Fred

+    rc = 0;
+out:
+    kfree(recv_rate);
+    return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_set_TL);

  static int get_xsl_irq(struct pci_dev *dev, int *hwirq)
  {
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index c8e19bfb5ef9..7ca0f6744125 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -709,100 +709,13 @@ EXPORT_SYMBOL_GPL(ocxl_config_set_afu_state);

  int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
  {
-    u32 val;
-    __be32 *be32ptr;
-    u8 timers;
-    int i, rc;
-    long recv_cap;
-    char *recv_rate;
-
      /*
       * Skip on function != 0, as the TL can only be defined on 0
       */
      if (PCI_FUNC(dev->devfn) != 0)
          return 0;

-    recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
-    if (!recv_rate)
-        return -ENOMEM;
-    /*
-     * The spec defines 64 templates for messages in the
-     * Transaction Layer (TL).
-     *
-     * The host and device each support a subset, so we need to
-     * configure the transmitters on each side to send only
-     * templates the receiver understands, at a rate the receiver
-     * can process.  Per the spec, template 0 must be supported by
-     * everybody. That's the template which has been used by the
-     * host and device so far.
-     *
-     * The sending rate limit must be set before the template is
-     * enabled.
-     */
-
-    /*
-     * Device -> host
-     */
-    rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
-                PNV_OCXL_TL_RATE_BUF_SIZE);
-    if (rc)
-        goto out;
-
-    for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
-        be32ptr = (__be32 *) &recv_rate[i];
-        pci_write_config_dword(dev,
-                tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
-                be32_to_cpu(*be32ptr));
-    }
-    val = recv_cap >> 32;
-    pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
-    val = recv_cap & GENMASK(31, 0);

- pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP +4, val);

-
-    /*
-     * Host -> device
-     */
-    for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
-        pci_read_config_dword(dev,
-                tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
-                &val);
-        be32ptr = (__be32 *) &recv_rate[i];
-        *be32ptr = cpu_to_be32(val);
-    }
-    pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
-    recv_cap = (long) val << 32;

- pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4,&val);

-    recv_cap |= val;
-
-    rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
-                PNV_OCXL_TL_RATE_BUF_SIZE);
-    if (rc)
-        goto out;
-
-    /*
-     * Opencapi commands needing to be retried are classified per
-     * the TL in 2 groups: short and long commands.
-     *
-     * The short back off timer it not used for now. It will be
-     * for opencapi 4.0.
-     *
-     * The long back off timer is typically used when an AFU hits
-     * a page fault but the NPU is already processing one. So the
-     * AFU needs to wait before it can resubmit. Having a value
-     * too low doesn't break anything, but can generate extra
-     * traffic on the link.
-     * We set it to 1.6 us for now. It's shorter than, but in the
-     * same order of magnitude as the time spent to process a page
-     * fault.
-     */
-    timers = 0x2 << 4; /* long timer = 1.6 us */
-    pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
-            timers);
-
-    rc = 0;
-out:
-    kfree(recv_rate);
-    return rc;
+    return pnv_ocxl_set_TL(dev, tl_dvsec);
  }
  EXPORT_SYMBOL_GPL(ocxl_config_set_TL);

Re: [PATCH 2/2] powerpc/powernv: ocxl move TL definition

Reply via email to