On Fri, Feb 26, 2021 at 07:01:05AM -0800, Tom Rix wrote:
> A question i do not know the answer to.
> 
> Seems like 'golden' is linked to a manufacturing (diagnostics?) image.

>From my brief history with Xilinx Ultrascale+ PCI cards I recall the golden
image being a sort of known good recovery image.

If we can't tell it should probably be explained better :)
> 
> If the public will never see it, should handling it here be done ?

Yes. We do want people to run their entire stack using mainline linux,
not just a part of it, if code is needed to get from recovery image to
full image or similar, then we should support that.

> Moritz, do you know ?
> 
> 
> On 2/17/21 10:40 PM, Lizhi Hou wrote:
> > The PCIE device driver which attaches to management function on Alveo
> to the management
> > devices. It instantiates one or more partition drivers which in turn
> more fpga partition / group ?
> > instantiate platform drivers. The instantiation of partition and platform
> > drivers is completely data driven.
> data driven ? everything is data driven.  do you mean dtb driven ?
> >
> > Signed-off-by: Sonal Santan <sonal.san...@xilinx.com>
> > Signed-off-by: Max Zhen <max.z...@xilinx.com>
> > Signed-off-by: Lizhi Hou <liz...@xilinx.com>
> > ---
> >  drivers/fpga/xrt/include/xroot.h | 114 +++++++++++
> >  drivers/fpga/xrt/mgmt/root.c     | 342 +++++++++++++++++++++++++++++++
> >  2 files changed, 456 insertions(+)
> >  create mode 100644 drivers/fpga/xrt/include/xroot.h
> >  create mode 100644 drivers/fpga/xrt/mgmt/root.c
> >
> > diff --git a/drivers/fpga/xrt/include/xroot.h 
> > b/drivers/fpga/xrt/include/xroot.h
> > new file mode 100644
> > index 000000000000..752e10daa85e
> > --- /dev/null
> > +++ b/drivers/fpga/xrt/include/xroot.h
> > @@ -0,0 +1,114 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * Header file for Xilinx Runtime (XRT) driver
> > + *
> > + * Copyright (C) 2020-2021 Xilinx, Inc.
> > + *
> > + * Authors:
> > + * Cheng Zhen <m...@xilinx.com>
> > + */
> > +
> > +#ifndef _XRT_ROOT_H_
> > +#define _XRT_ROOT_H_
> > +
> > +#include <linux/pci.h>
> > +#include "subdev_id.h"
> > +#include "events.h"
> > +
> > +typedef bool (*xrt_subdev_match_t)(enum xrt_subdev_id,
> > +   struct platform_device *, void *);
> > +#define XRT_SUBDEV_MATCH_PREV      ((xrt_subdev_match_t)-1)
> > +#define XRT_SUBDEV_MATCH_NEXT      ((xrt_subdev_match_t)-2)
> > +
> > +/*
> > + * Root IOCTL calls.
> > + */
> > +enum xrt_root_ioctl_cmd {
> > +   /* Leaf actions. */
> > +   XRT_ROOT_GET_LEAF = 0,
> > +   XRT_ROOT_PUT_LEAF,
> > +   XRT_ROOT_GET_LEAF_HOLDERS,
> > +
> > +   /* Group actions. */
> > +   XRT_ROOT_CREATE_GROUP,
> > +   XRT_ROOT_REMOVE_GROUP,
> > +   XRT_ROOT_LOOKUP_GROUP,
> > +   XRT_ROOT_WAIT_GROUP_BRINGUP,
> > +
> > +   /* Event actions. */
> > +   XRT_ROOT_EVENT,
> should this be XRT_ROOT_EVENT_SYNC ?
> > +   XRT_ROOT_EVENT_ASYNC,
> > +
> > +   /* Device info. */
> > +   XRT_ROOT_GET_RESOURCE,
> > +   XRT_ROOT_GET_ID,
> > +
> > +   /* Misc. */
> > +   XRT_ROOT_HOT_RESET,
> > +   XRT_ROOT_HWMON,
> > +};
> > +
> > +struct xrt_root_ioctl_get_leaf {
> > +   struct platform_device *xpigl_pdev; /* caller's pdev */
> xpigl_ ? unneeded suffix in element names
> > +   xrt_subdev_match_t xpigl_match_cb;
> > +   void *xpigl_match_arg;
> > +   struct platform_device *xpigl_leaf; /* target leaf pdev */
> > +};
> > +
> > +struct xrt_root_ioctl_put_leaf {
> > +   struct platform_device *xpipl_pdev; /* caller's pdev */
> > +   struct platform_device *xpipl_leaf; /* target's pdev */
> 
> caller_pdev;
> 
> target_pdev;
> 
> > +};
> > +
> > +struct xrt_root_ioctl_lookup_group {
> > +   struct platform_device *xpilp_pdev; /* caller's pdev */
> > +   xrt_subdev_match_t xpilp_match_cb;
> > +   void *xpilp_match_arg;
> > +   int xpilp_grp_inst;
> > +};
> > +
> > +struct xrt_root_ioctl_get_holders {
> > +   struct platform_device *xpigh_pdev; /* caller's pdev */
> > +   char *xpigh_holder_buf;
> > +   size_t xpigh_holder_buf_len;
> > +};
> > +
> > +struct xrt_root_ioctl_get_res {
> > +   struct resource *xpigr_res;
> > +};
> > +
> > +struct xrt_root_ioctl_get_id {
> > +   unsigned short  xpigi_vendor_id;
> > +   unsigned short  xpigi_device_id;
> > +   unsigned short  xpigi_sub_vendor_id;
> > +   unsigned short  xpigi_sub_device_id;
> > +};
> > +
> > +struct xrt_root_ioctl_hwmon {
> > +   bool xpih_register;
> > +   const char *xpih_name;
> > +   void *xpih_drvdata;
> > +   const struct attribute_group **xpih_groups;
> > +   struct device *xpih_hwmon_dev;
> > +};
> > +
> > +typedef int (*xrt_subdev_root_cb_t)(struct device *, void *, u32, void *);
> This function pointer type is important, please add a comment about its use 
> and expected parameters
> > +int xrt_subdev_root_request(struct platform_device *self, u32 cmd, void 
> > *arg);
> > +
> > +/*
> > + * Defines physical function (MPF / UPF) specific operations
> > + * needed in common root driver.
> > + */
> > +struct xroot_pf_cb {
> > +   void (*xpc_hot_reset)(struct pci_dev *pdev);
> This is only ever set to xmgmt_root_hot_reset, why is this abstraction needed 
> ?
> > +};
> > +
> > +int xroot_probe(struct pci_dev *pdev, struct xroot_pf_cb *cb, void **root);
> > +void xroot_remove(void *root);
> > +bool xroot_wait_for_bringup(void *root);
> > +int xroot_add_vsec_node(void *root, char *dtb);
> > +int xroot_create_group(void *xr, char *dtb);
> > +int xroot_add_simple_node(void *root, char *dtb, const char *endpoint);
> > +void xroot_broadcast(void *root, enum xrt_events evt);
> > +
> > +#endif     /* _XRT_ROOT_H_ */
> > diff --git a/drivers/fpga/xrt/mgmt/root.c b/drivers/fpga/xrt/mgmt/root.c
> > new file mode 100644
> > index 000000000000..583a37c9d30c
> > --- /dev/null
> > +++ b/drivers/fpga/xrt/mgmt/root.c
> > @@ -0,0 +1,342 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Xilinx Alveo Management Function Driver
> > + *
> > + * Copyright (C) 2020-2021 Xilinx, Inc.
> > + *
> > + * Authors:
> > + * Cheng Zhen <m...@xilinx.com>
> > + */
> > +
> > +#include <linux/module.h>
> > +#include <linux/pci.h>
> > +#include <linux/aer.h>
> > +#include <linux/vmalloc.h>
> > +#include <linux/delay.h>
> > +
> > +#include "xroot.h"
> > +#include "main-impl.h"
> > +#include "metadata.h"
> > +
> > +#define XMGMT_MODULE_NAME  "xmgmt"
> The xrt modules would be more easily identified with a 'xrt' prefix instead 
> of 'x'
> > +#define XMGMT_DRIVER_VERSION       "4.0.0"
> > +
> > +#define XMGMT_PDEV(xm)             ((xm)->pdev)
> > +#define XMGMT_DEV(xm)              (&(XMGMT_PDEV(xm)->dev))
> > +#define xmgmt_err(xm, fmt, args...)        \
> > +   dev_err(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> > +#define xmgmt_warn(xm, fmt, args...)       \
> > +   dev_warn(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> > +#define xmgmt_info(xm, fmt, args...)       \
> > +   dev_info(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> > +#define xmgmt_dbg(xm, fmt, args...)        \
> > +   dev_dbg(XMGMT_DEV(xm), "%s: " fmt, __func__, ##args)
> > +#define XMGMT_DEV_ID(_pcidev)                      \
> > +   ({ typeof(_pcidev) (pcidev) = (_pcidev);        \
> > +   ((pci_domain_nr((pcidev)->bus) << 16) | \
> > +   PCI_DEVID((pcidev)->bus->number, 0)); })
> > +
> > +static struct class *xmgmt_class;
> > +static const struct pci_device_id xmgmt_pci_ids[] = {
> > +   { PCI_DEVICE(0x10EE, 0xd020), }, /* Alveo U50 (golden image) */
> > +   { PCI_DEVICE(0x10EE, 0x5020), }, /* Alveo U50 */
> 
> demagic this table, look at dfl-pci for how to use existing #define for the 
> vendor and create a new on for the device.  If there are vf's add them at the 
> same time.
> 
> What is a golden image ?
> 
> > +   { 0, }
> > +};
> > +
> > +struct xmgmt {
> > +   struct pci_dev *pdev;
> > +   void *root;
> > +
> > +   bool ready;
> > +};
> > +
> > +static int xmgmt_config_pci(struct xmgmt *xm)
> > +{
> > +   struct pci_dev *pdev = XMGMT_PDEV(xm);
> > +   int rc;
> > +
> > +   rc = pcim_enable_device(pdev);
> > +   if (rc < 0) {
> > +           xmgmt_err(xm, "failed to enable device: %d", rc);
> > +           return rc;
> > +   }
> > +
> > +   rc = pci_enable_pcie_error_reporting(pdev);
> > +   if (rc)
> > +           xmgmt_warn(xm, "failed to enable AER: %d", rc);
> > +
> > +   pci_set_master(pdev);
> > +
> > +   rc = pcie_get_readrq(pdev);
> Review this call, it does not go negative
> > +   if (rc < 0) {
> > +           xmgmt_err(xm, "failed to read mrrs %d", rc);
> > +           return rc;
> > +   }
> this is a quirk, add a comment.
> > +   if (rc > 512) {
> > +           rc = pcie_set_readrq(pdev, 512);
> > +           if (rc) {
> > +                   xmgmt_err(xm, "failed to force mrrs %d", rc);
> similar calls do not fail here.
> > +                   return rc;
> > +           }
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> > +static int xmgmt_match_slot_and_save(struct device *dev, void *data)
> > +{
> > +   struct xmgmt *xm = data;
> > +   struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +   if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
> > +           pci_cfg_access_lock(pdev);
> > +           pci_save_state(pdev);
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> > +static void xmgmt_pci_save_config_all(struct xmgmt *xm)
> > +{
> > +   bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_save);
> 
> This is a bus call, not a device call.
> 
> Can this be changed into something like what hot reset does ?
> 
> > +}
> > +
> > +static int xmgmt_match_slot_and_restore(struct device *dev, void *data)
> > +{
> > +   struct xmgmt *xm = data;
> > +   struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +   if (XMGMT_DEV_ID(pdev) == XMGMT_DEV_ID(xm->pdev)) {
> > +           pci_restore_state(pdev);
> > +           pci_cfg_access_unlock(pdev);
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> > +static void xmgmt_pci_restore_config_all(struct xmgmt *xm)
> > +{
> > +   bus_for_each_dev(&pci_bus_type, NULL, xm, xmgmt_match_slot_and_restore);
> > +}
> > +
> > +static void xmgmt_root_hot_reset(struct pci_dev *pdev)
> > +{
> > +   struct xmgmt *xm = pci_get_drvdata(pdev);
> > +   struct pci_bus *bus;
> > +   u8 pci_bctl;
> > +   u16 pci_cmd, devctl;
> > +   int i, ret;
> > +
> > +   xmgmt_info(xm, "hot reset start");
> > +
> > +   xmgmt_pci_save_config_all(xm);
> > +
> > +   pci_disable_device(pdev);
> > +
> > +   bus = pdev->bus;
> > +
> > +   /*
> > +    * When flipping the SBR bit, device can fall off the bus. This is
> > +    * usually no problem at all so long as drivers are working properly
> > +    * after SBR. However, some systems complain bitterly when the device
> > +    * falls off the bus.
> > +    * The quick solution is to temporarily disable the SERR reporting of
> > +    * switch port during SBR.
> > +    */
> > +
> > +   pci_read_config_word(bus->self, PCI_COMMAND, &pci_cmd);
> > +   pci_write_config_word(bus->self, PCI_COMMAND,
> > +                         (pci_cmd & ~PCI_COMMAND_SERR));
> > +   pcie_capability_read_word(bus->self, PCI_EXP_DEVCTL, &devctl);
> > +   pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL,
> > +                              (devctl & ~PCI_EXP_DEVCTL_FERE));
> > +   pci_read_config_byte(bus->self, PCI_BRIDGE_CONTROL, &pci_bctl);
> > +   pci_bctl |= PCI_BRIDGE_CTL_BUS_RESET;
> > +   pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
> 
> how the pci config values are set and cleared should be consistent.
> 
> this call should be
> 
> pci_write_config_byte (... pci_bctl | PCI_BRIDGE_CTL_BUF_RESET )
> 
> and the next &= avoided
> 
> > +
> > +   msleep(100);
> > +   pci_bctl &= ~PCI_BRIDGE_CTL_BUS_RESET;
> > +   pci_write_config_byte(bus->self, PCI_BRIDGE_CONTROL, pci_bctl);
> > +   ssleep(1);
> > +
> > +   pcie_capability_write_word(bus->self, PCI_EXP_DEVCTL, devctl);
> > +   pci_write_config_word(bus->self, PCI_COMMAND, pci_cmd);
> > +
> > +   ret = pci_enable_device(pdev);
> > +   if (ret)
> > +           xmgmt_err(xm, "failed to enable device, ret %d", ret);
> > +
> > +   for (i = 0; i < 300; i++) {
> > +           pci_read_config_word(pdev, PCI_COMMAND, &pci_cmd);
> > +           if (pci_cmd != 0xffff)
> what happens with i == 300 and pci_cmd is still 0xffff ?
> > +                   break;
> > +           msleep(20);
> > +   }
> > +
> > +   xmgmt_info(xm, "waiting for %d ms", i * 20);
> > +   xmgmt_pci_restore_config_all(xm);
> > +   xmgmt_config_pci(xm);
> > +}
> > +
> > +static int xmgmt_create_root_metadata(struct xmgmt *xm, char **root_dtb)
> > +{
> > +   char *dtb = NULL;
> > +   int ret;
> > +
> > +   ret = xrt_md_create(XMGMT_DEV(xm), &dtb);
> > +   if (ret) {
> > +           xmgmt_err(xm, "create metadata failed, ret %d", ret);
> > +           goto failed;
> > +   }
> > +
> > +   ret = xroot_add_vsec_node(xm->root, dtb);
> > +   if (ret == -ENOENT) {
> > +           /*
> > +            * We may be dealing with a MFG board.
> > +            * Try vsec-golden which will bring up all hard-coded leaves
> > +            * at hard-coded offsets.
> > +            */
> > +           ret = xroot_add_simple_node(xm->root, dtb, 
> > XRT_MD_NODE_VSEC_GOLDEN);
> 
> Manufacturing diagnostics ?
> 
> Tom
> 
> > +   } else if (ret == 0) {
> > +           ret = xroot_add_simple_node(xm->root, dtb, 
> > XRT_MD_NODE_MGMT_MAIN);
> > +   }
> > +   if (ret)
> > +           goto failed;
> > +
> > +   *root_dtb = dtb;
> > +   return 0;
> > +
> > +failed:
> > +   vfree(dtb);
> > +   return ret;
> > +}
> > +
> > +static ssize_t ready_show(struct device *dev,
> > +                     struct device_attribute *da,
> > +                     char *buf)
> > +{
> > +   struct pci_dev *pdev = to_pci_dev(dev);
> > +   struct xmgmt *xm = pci_get_drvdata(pdev);
> > +
> > +   return sprintf(buf, "%d\n", xm->ready);
> > +}
> > +static DEVICE_ATTR_RO(ready);
> > +
> > +static struct attribute *xmgmt_root_attrs[] = {
> > +   &dev_attr_ready.attr,
> > +   NULL
> > +};
> > +
> > +static struct attribute_group xmgmt_root_attr_group = {
> > +   .attrs = xmgmt_root_attrs,
> > +};
> > +
> > +static struct xroot_pf_cb xmgmt_xroot_pf_cb = {
> > +   .xpc_hot_reset = xmgmt_root_hot_reset,
> > +};
> > +
> > +static int xmgmt_probe(struct pci_dev *pdev, const struct pci_device_id 
> > *id)
> > +{
> > +   int ret;
> > +   struct device *dev = &pdev->dev;
> > +   struct xmgmt *xm = devm_kzalloc(dev, sizeof(*xm), GFP_KERNEL);
> > +   char *dtb = NULL;
> > +
> > +   if (!xm)
> > +           return -ENOMEM;
> > +   xm->pdev = pdev;
> > +   pci_set_drvdata(pdev, xm);
> > +
> > +   ret = xmgmt_config_pci(xm);
> > +   if (ret)
> > +           goto failed;
> > +
> > +   ret = xroot_probe(pdev, &xmgmt_xroot_pf_cb, &xm->root);
> > +   if (ret)
> > +           goto failed;
> > +
> > +   ret = xmgmt_create_root_metadata(xm, &dtb);
> > +   if (ret)
> > +           goto failed_metadata;
> > +
> > +   ret = xroot_create_group(xm->root, dtb);
> > +   vfree(dtb);
> > +   if (ret)
> > +           xmgmt_err(xm, "failed to create root group: %d", ret);
> > +
> > +   if (!xroot_wait_for_bringup(xm->root))
> > +           xmgmt_err(xm, "failed to bringup all groups");
> > +   else
> > +           xm->ready = true;
> > +
> > +   ret = sysfs_create_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
> > +   if (ret) {
> > +           /* Warning instead of failing the probe. */
> > +           xmgmt_warn(xm, "create xmgmt root attrs failed: %d", ret);
> > +   }
> > +
> > +   xroot_broadcast(xm->root, XRT_EVENT_POST_CREATION);
> > +   xmgmt_info(xm, "%s started successfully", XMGMT_MODULE_NAME);
> > +   return 0;
> > +
> > +failed_metadata:
> > +   (void)xroot_remove(xm->root);
> > +failed:
> > +   pci_set_drvdata(pdev, NULL);
> > +   return ret;
> > +}
> > +
> > +static void xmgmt_remove(struct pci_dev *pdev)
> > +{
> > +   struct xmgmt *xm = pci_get_drvdata(pdev);
> > +
> > +   xroot_broadcast(xm->root, XRT_EVENT_PRE_REMOVAL);
> > +   sysfs_remove_group(&pdev->dev.kobj, &xmgmt_root_attr_group);
> > +   (void)xroot_remove(xm->root);
> > +   pci_disable_pcie_error_reporting(xm->pdev);
> > +   xmgmt_info(xm, "%s cleaned up successfully", XMGMT_MODULE_NAME);
> > +}
> > +
> > +static struct pci_driver xmgmt_driver = {
> > +   .name = XMGMT_MODULE_NAME,
> > +   .id_table = xmgmt_pci_ids,
> > +   .probe = xmgmt_probe,
> > +   .remove = xmgmt_remove,
> > +};
> > +
> > +static int __init xmgmt_init(void)
> > +{
> > +   int res = 0;
> > +
> > +   res = xmgmt_main_register_leaf();
> > +   if (res)
> > +           return res;
> > +
> > +   xmgmt_class = class_create(THIS_MODULE, XMGMT_MODULE_NAME);
> > +   if (IS_ERR(xmgmt_class))
> > +           return PTR_ERR(xmgmt_class);
> > +
> > +   res = pci_register_driver(&xmgmt_driver);
> > +   if (res) {
> > +           class_destroy(xmgmt_class);
> > +           return res;
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> > +static __exit void xmgmt_exit(void)
> > +{
> > +   pci_unregister_driver(&xmgmt_driver);
> > +   class_destroy(xmgmt_class);
> > +   xmgmt_main_unregister_leaf();
> > +}
> > +
> > +module_init(xmgmt_init);
> > +module_exit(xmgmt_exit);
> > +
> > +MODULE_DEVICE_TABLE(pci, xmgmt_pci_ids);
> > +MODULE_VERSION(XMGMT_DRIVER_VERSION);
> > +MODULE_AUTHOR("XRT Team <runt...@xilinx.com>");
> > +MODULE_DESCRIPTION("Xilinx Alveo management function driver");
> > +MODULE_LICENSE("GPL v2");
> 
- Moritz

Reply via email to