Thanks for your comments.
See my replies in line with [Ram].


-----Original Message-----
From: Mark Bloch [mailto:ma...@mellanox.com] 
Sent: Monday, September 12, 2016 9:44 PM
To: Ram Amrani <ram.amr...@qlogic.com>; dledf...@redhat.com; David Miller 
<da...@davemloft.net>
Cc: Yuval Mintz <yuval.mi...@qlogic.com>; Ariel Elior <ariel.el...@qlogic.com>; 
Michal Kalderon <michal.kalde...@qlogic.com>; Rajesh Borundia 
<rajesh.borun...@qlogic.com>; linux-r...@vger.kernel.org; netdev 
<netdev@vger.kernel.org>
Subject: Re: [RFC 02/11] Add RoCE driver framework


Hi Ram,

Just a few thoughts 

On 12/09/2016 19:07, Ram Amrani wrote:
> Adds a skeletal implementation of the qed* RoCE driver - basically the 
> ability to communicate with the qede driver and receive notifications 
> from it regarding various init/exit events.
> 
> Signed-off-by: Rajesh Borundia <rajesh.borun...@qlogic.com>
> Signed-off-by: Ram Amrani <ram.amr...@qlogic.com>
> ---
>  drivers/infiniband/Kconfig                   |   2 +
>  drivers/infiniband/hw/Makefile               |   1 +
>  drivers/infiniband/hw/qedr/Kconfig           |   7 +
>  drivers/infiniband/hw/qedr/Makefile          |   3 +
>  drivers/infiniband/hw/qedr/main.c            | 293 +++++++++++++++++++++++++
>  drivers/infiniband/hw/qedr/qedr.h            |  60 ++++++
>  drivers/net/ethernet/qlogic/qede/Makefile    |   1 +
>  drivers/net/ethernet/qlogic/qede/qede.h      |   9 +
>  drivers/net/ethernet/qlogic/qede/qede_main.c |  35 ++-  
> drivers/net/ethernet/qlogic/qede/qede_roce.c | 309 +++++++++++++++++++++++++++
>  include/linux/qed/qed_if.h                   |   3 +-
>  include/linux/qed/qede_roce.h                |  88 ++++++++
>  include/uapi/linux/pci_regs.h                |   3 +
>  13 files changed, 803 insertions(+), 11 deletions(-)  create mode 
> 100644 drivers/infiniband/hw/qedr/Kconfig
>  create mode 100644 drivers/infiniband/hw/qedr/Makefile
>  create mode 100644 drivers/infiniband/hw/qedr/main.c  create mode 
> 100644 drivers/infiniband/hw/qedr/qedr.h  create mode 100644 
> drivers/net/ethernet/qlogic/qede/qede_roce.c
>  create mode 100644 include/linux/qed/qede_roce.h

[SNIP]

> +
> +MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver"); 
> +MODULE_AUTHOR("QLogic Corporation"); MODULE_LICENSE("Dual BSD/GPL"); 
> +MODULE_VERSION(QEDR_MODULE_VERSION);
> +
> +uint debug;
> +module_param(debug, uint, 0);
> +MODULE_PARM_DESC(debug, "Default debug msglevel");

Why are you adding this as a module parameter? 
[Ram] Yuval commented on this in a previous e-mail


> +static LIST_HEAD(qedr_dev_list);
> +static DEFINE_SPINLOCK(qedr_devlist_lock);
> +

You already have a qedr_dev_list mutex in the qede_roce.c file, why do you need 
this spinlock as well?

 [Ram] qedr_devlist_lock - a static (local) list of qedr devices maintained by 
qedr, protected by spinlock. Not in used in the current patches.
qedr_dev_list_lock (with '_') - a static (local) list of qedr devices 
maintained by qede, protected by mutex.
We'll consider removing the first as it is currently not used and/or rename 
them to be more distinct.



> +void qedr_ib_dispatch_event(struct qedr_dev *dev, u8 port_num,
> +                         enum ib_event_type type)
> +{
> +     struct ib_event ibev;
> +
> +     ibev.device = &dev->ibdev;
> +     ibev.element.port_num = port_num;
> +     ibev.event = type;
> +
> +     ib_dispatch_event(&ibev);
> +}
> +
> +static enum rdma_link_layer qedr_link_layer(struct ib_device *device,
> +                                         u8 port_num)
> +{
> +     return IB_LINK_LAYER_ETHERNET;
> +}
> +
> +static int qedr_register_device(struct qedr_dev *dev) {
> +     strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX);
> +
> +     memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
> +     dev->ibdev.owner = THIS_MODULE;
> +
> +     dev->ibdev.get_link_layer = qedr_link_layer;
> +
> +     return 0;
> +}
> +
> +/* QEDR sysfs interface */
> +static ssize_t show_rev(struct device *device, struct device_attribute *attr,
> +                     char *buf)
> +{
> +     struct qedr_dev *dev = dev_get_drvdata(device);
> +
> +     return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor); }
> +
> +static ssize_t show_fw_ver(struct device *device, struct device_attribute 
> *attr,
> +                        char *buf)
> +{
> +     return scnprintf(buf, PAGE_SIZE, "%s\n", "FW_VER_TO_ADD"); }

Ira Weiny has added a generic way to expose firmware versions in the rdma 
stack, can you have please have a look at 
c73428230d98d1352bcc69cd8306c292a85e1e42 and see how he converted the mlx5_ib 
module to use it.
[Ram] This way is replaced to be the same as you describe in patch 0004. I'll 
if I can move it to this patch to avoid confusion.

> +static ssize_t show_hca_type(struct device *device,
> +                          struct device_attribute *attr, char *buf) {
> +     return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET"); }
> +
> +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static 
> +DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static 
> +DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
> +
> +static struct device_attribute *qedr_attributes[] = {
> +     &dev_attr_hw_rev,
> +     &dev_attr_fw_ver,
> +     &dev_attr_hca_type
> +};
> +
> +static void qedr_remove_sysfiles(struct qedr_dev *dev) {
> +     int i;
> +
> +     for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
> +             device_remove_file(&dev->ibdev.dev, qedr_attributes[i]); }
> +
> +void qedr_config_debug(uint debug, u32 *p_dp_module, u8 *p_dp_level) 
> +{
> +     *p_dp_level = 0;
> +     *p_dp_module = 0;
> +
> +     if (debug & QED_LOG_VERBOSE_MASK) {
> +             *p_dp_level = QED_LEVEL_VERBOSE;
> +             *p_dp_module = (debug & 0x3FFFFFFF);
> +     } else if (debug & QED_LOG_INFO_MASK) {
> +             *p_dp_level = QED_LEVEL_INFO;
> +     } else if (debug & QED_LOG_NOTICE_MASK) {
> +             *p_dp_level = QED_LEVEL_NOTICE;
> +     }
> +}
> +
> +static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev 
> +*pdev) {
> +     struct pci_dev *bridge;
> +     u32 val;
> +
> +     dev->atomic_cap = IB_ATOMIC_NONE;
> +
> +     bridge = pdev->bus->self;
> +     if (!bridge)
> +             return;
> +
> +     /* Check whether we are connected directly or via a switch */
> +     while (bridge && bridge->bus->parent) {
> +             DP_NOTICE(dev,
> +                       "Device is not connected directly to root. 
> bridge->bus->number=%d primary=%d\n",
> +                       bridge->bus->number, bridge->bus->primary);
> +             /* Need to check Atomic Op Routing Supported all the way to
> +              * root complex.
> +              */
> +             pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
> +             if (!(val & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) {
> +                     pcie_capability_clear_word(pdev,
> +                                                PCI_EXP_DEVCTL2,
> +                                                PCI_EXP_DEVCTL2_ATOMIC_REQ);
> +                     return;
> +             }
> +             bridge = bridge->bus->parent->self;
> +     }
> +     bridge = pdev->bus->self;
> +
> +     /* according to bridge capability */
> +     pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
> +     if (val & PCI_EXP_DEVCAP2_ATOMIC_COMP64) {
> +             pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
> +                                      PCI_EXP_DEVCTL2_ATOMIC_REQ);
> +             dev->atomic_cap = IB_ATOMIC_GLOB;
> +     } else {
> +             pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
> +                                        PCI_EXP_DEVCTL2_ATOMIC_REQ);
> +     }
> +}
> +
> +static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
> +                              struct net_device *ndev)
> +{
> +     struct qedr_dev *dev;
> +     int rc = 0, i;
> +
> +     dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev));
> +     if (!dev) {
> +             pr_err("Unable to allocate ib device\n");
> +             return NULL;
> +     }
> +
> +     qedr_config_debug(debug, &dev->dp_module, &dev->dp_level);
> +     DP_VERBOSE(dev, QEDR_MSG_INIT, "qedr add device called\n");
> +
> +     dev->pdev = pdev;
> +     dev->ndev = ndev;
> +     dev->cdev = cdev;
> +
> +     qedr_pci_set_atomic(dev, pdev);
> +
> +     rc = qedr_register_device(dev);
> +     if (rc) {
> +             DP_ERR(dev, "Unable to allocate register device\n");
> +             goto init_err;
> +     }
> +
> +     for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
> +             if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
> +                     goto init_err;
> +
> +     spin_lock(&qedr_devlist_lock);
> +     list_add_tail_rcu(&dev->entry, &qedr_dev_list);
> +     spin_unlock(&qedr_devlist_lock);
> +
> +     DP_VERBOSE(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
> +     return dev;
> +
> +init_err:
> +     ib_dealloc_device(&dev->ibdev);
> +     DP_ERR(dev, "qedr driver load failed rc=%d\n", rc);
> +
> +     return NULL;
> +}
> +
> +static void qedr_remove(struct qedr_dev *dev) {
> +     /* First unregister with stack to stop all the active traffic
> +      * of the registered clients.
> +      */
> +     qedr_remove_sysfiles(dev);
> +
> +     spin_lock(&qedr_devlist_lock);
> +     list_del_rcu(&dev->entry);
> +     spin_unlock(&qedr_devlist_lock);
> +
> +     ib_dealloc_device(&dev->ibdev);
> +}
> +
> +static int qedr_close(struct qedr_dev *dev) {
> +     qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
> +

Why are you sending port number hard-coded as 1?

[Ram] Our implementation always uses one port. Here's the ibv_devinfo output 
for example:
hca_id: qedr0
        transport:                      InfiniBand (0)
        fw_ver:                         8.10.10.0
        ...
        phys_port_cnt:                  1
                port:   1
                        state:                  PORT_ACTIVE (4)
                        ...
                        link_layer:             Ethernet

hca_id: qedr1
        transport:                      InfiniBand (0)
        fw_ver:                         8.10.10.0
        ...
        phys_port_cnt:                  1
                port:   1
                        state:                  PORT_ACTIVE (4)
                        ...
                        link_layer:             Ethernet

Mark

Reply via email to