Hi Eli, are there known IBoE fixes which are in ofed but missed 2.6.37-rc1?
Also, can the below and/or any other enhancements you've placed in ofed be
pushed upstream? it would be great to have perf counters operating fine for IBoE

Or.

>From 72c316b60f62401e031520fe3f55ec6879bbc42b Mon Sep 17 00:00:00 2001
From: Eli Cohen <e...@mellanox.co.il>
Date: Wed, 6 Jan 2010 14:09:38 +0200
Subject: [PATCH 12/12] mlx4: add support for reading performance counters

This patch uses basic or extended counters which can be read by a command
interface, to report counters for all the QPs that work on an rdmaoe port. This
effectively allows to implement performance counter ala IB.

Signed-off-by: Eli Cohen <e...@mellanox.co.il>
---
 drivers/infiniband/hw/mlx4/mad.c     |   86 ++++++++++++++++++++++++++++++++-
 drivers/infiniband/hw/mlx4/main.c    |   17 ++++++-
 drivers/infiniband/hw/mlx4/mlx4_ib.h |    1 +
 drivers/infiniband/hw/mlx4/qp.c      |    2 +
 drivers/net/mlx4/fw.h                |    1 -
 drivers/net/mlx4/main.c              |   22 +++++++--
 include/linux/mlx4/cmd.h             |    4 ++
 include/linux/mlx4/device.h          |   36 ++++++++++++++
 8 files changed, 159 insertions(+), 10 deletions(-)

Index: ofed_kernel-fixes/drivers/infiniband/hw/mlx4/mad.c
===================================================================
--- ofed_kernel-fixes.orig/drivers/infiniband/hw/mlx4/mad.c     2010-09-01 
15:30:01.000000000 +0300
+++ ofed_kernel-fixes/drivers/infiniband/hw/mlx4/mad.c  2010-09-01 
15:33:48.571462204 +0300
@@ -229,9 +229,9 @@ static void forward_trap(struct mlx4_ib_
        }
 }

-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags,        u8 
port_num,
-                       struct ib_wc *in_wc, struct ib_grh *in_grh,
-                       struct ib_mad *in_mad, struct ib_mad *out_mad)
+static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+                          struct ib_wc *in_wc, struct ib_grh *in_grh,
+                          struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
        u16 slid, prev_lid = 0;
        int err;
@@ -299,6 +299,87 @@ int mlx4_ib_process_mad(struct ib_device
        return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }

+static __be32 be64_to_be32(__be64 b64)
+{
+       return cpu_to_be32(be64_to_cpu(b64) & 0xffffffff);
+}
+
+static void edit_counters(struct mlx4_counters *cnt, void *data)
+{
+       *(__be32 *)(data + 40 + 24) = be64_to_be32(cnt->tx_bytes);
+       *(__be32 *)(data + 40 + 28) = be64_to_be32(cnt->rx_bytes);
+       *(__be32 *)(data + 40 + 32) = be64_to_be32(cnt->tx_frames);
+       *(__be32 *)(data + 40 + 36) = be64_to_be32(cnt->rx_frames);
+}
+
+static void edit_ext_counters(struct mlx4_counters_ext *cnt, void *data)
+{
+       *(__be32 *)(data + 40 + 24) = be64_to_be32(cnt->tx_uni_bytes);
+       *(__be32 *)(data + 40 + 28) = be64_to_be32(cnt->rx_uni_bytes);
+       *(__be32 *)(data + 40 + 32) = be64_to_be32(cnt->tx_uni_frames);
+       *(__be32 *)(data + 40 + 36) = be64_to_be32(cnt->rx_uni_frames);
+       *(__be32 *)(data + 40 + 8) = be64_to_be32(cnt->rx_err_frames);
+}
+
+static int rdmaoe_process_mad(struct ib_device *ibdev, int mad_flags, u8 
port_num,
+                              struct ib_wc *in_wc, struct ib_grh *in_grh,
+                              struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+       struct mlx4_cmd_mailbox *mailbox;
+       struct mlx4_ib_dev *dev = to_mdev(ibdev);
+       int err;
+       u32 inmod = dev->counters[port_num - 1] & 0xffff;
+       int mode;
+
+        if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+               return -EINVAL;
+
+       mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
+       if (IS_ERR(mailbox))
+               return IB_MAD_RESULT_FAILURE;
+
+       err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
+                          MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C);
+       if (err)
+               err = IB_MAD_RESULT_FAILURE;
+       else {
+               memset(out_mad->data, 0, sizeof out_mad->data);
+               mode = be32_to_cpu(((struct mlx4_counters 
*)mailbox->buf)->counter_mode) & 0xf;
+               switch (mode) {
+               case 0:
+                       edit_counters(mailbox->buf, out_mad->data);
+                       err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+                       break;
+               case 1:
+                       edit_ext_counters(mailbox->buf, out_mad->data);
+                       err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+                       break;
+               default:
+                       err = IB_MAD_RESULT_FAILURE;
+               }
+       }
+
+       mlx4_free_cmd_mailbox(dev->dev, mailbox);
+
+       return err;
+}
+
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags,        u8 
port_num,
+                       struct ib_wc *in_wc, struct ib_grh *in_grh,
+                       struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+       switch (rdma_port_link_layer(ibdev, port_num)) {
+       case IB_LINK_LAYER_INFINIBAND:
+               return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
+                                     in_grh, in_mad, out_mad);
+       case IB_LINK_LAYER_ETHERNET:
+               return rdmaoe_process_mad(ibdev, mad_flags, port_num, in_wc,
+                                         in_grh, in_mad, out_mad);
+       default:
+               return -EINVAL;
+       }
+}
+
 static void send_handler(struct ib_mad_agent *agent,
                         struct ib_mad_send_wc *mad_send_wc)
 {
Index: ofed_kernel-fixes/drivers/infiniband/hw/mlx4/main.c
===================================================================
--- ofed_kernel-fixes.orig/drivers/infiniband/hw/mlx4/main.c    2010-09-01 
15:30:01.000000000 +0300
+++ ofed_kernel-fixes/drivers/infiniband/hw/mlx4/main.c 2010-09-01 
15:33:05.427464524 +0300
@@ -1169,6 +1169,7 @@ static void *mlx4_ib_add(struct mlx4_dev
        int i;
        int err;
        struct mlx4_ib_rocee *rocee;
+       int k;

        if (!mlx4_ib_version_printed) {
                printk(KERN_INFO "%s", mlx4_ib_version);
@@ -1309,12 +1310,18 @@ static void *mlx4_ib_add(struct mlx4_dev
        if (init_node_data(ibdev))
                goto err_map;

+       for (k = 0; k < ibdev->num_ports; ++k) {
+               err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[k]);
+               if (err)
+                       ibdev->counters[k] = -1;
+       }
+
        spin_lock_init(&ibdev->sm_lock);
        mutex_init(&ibdev->cap_mask_mutex);
        mutex_init(&ibdev->xrc_reg_mutex);

        if (ib_register_device(&ibdev->ib_dev))
-               goto err_map;
+               goto err_counter;

        if (mlx4_ib_mad_init(ibdev))
                goto err_reg;
@@ -1332,7 +1339,7 @@ static void *mlx4_ib_add(struct mlx4_dev
        }

        if(sysfs_create_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group))
-               goto err_reg;
+               goto err_notif;

        ibdev->ib_active = 1;

@@ -1346,6 +1353,10 @@ err_notif:
 err_reg:
        ib_unregister_device(&ibdev->ib_dev);

+err_counter:
+       for (; k; --k)
+               mlx4_counter_free(ibdev->dev, ibdev->counters[k - 1]);
+
 err_map:
        iounmap(ibdev->uar_map);

@@ -1365,11 +1376,15 @@ static void mlx4_ib_remove(struct mlx4_d
 {
        struct mlx4_ib_dev *ibdev = ibdev_ptr;
        int p;
+       int k;

        sysfs_remove_group(&ibdev->ib_dev.dev.kobj, &diag_counters_group);

        mlx4_ib_mad_cleanup(ibdev);
        ib_unregister_device(&ibdev->ib_dev);
+       for (k = 0; k < ibdev->num_ports; ++k)
+               mlx4_counter_free(ibdev->dev, ibdev->counters[k]);
+
        if (ibdev->rocee.nb.notifier_call) {
                unregister_netdevice_notifier(&ibdev->rocee.nb);
                flush_workqueue(wq);
Index: ofed_kernel-fixes/drivers/infiniband/hw/mlx4/mlx4_ib.h
===================================================================
--- ofed_kernel-fixes.orig/drivers/infiniband/hw/mlx4/mlx4_ib.h 2010-09-01 
15:30:01.000000000 +0300
+++ ofed_kernel-fixes/drivers/infiniband/hw/mlx4/mlx4_ib.h      2010-09-01 
15:33:05.043465489 +0300
@@ -230,6 +230,7 @@ struct mlx4_ib_dev {
        struct mutex            xrc_reg_mutex;
        int                     ib_active;
        struct mlx4_ib_rocee    rocee;
+       int                     counters[MLX4_MAX_PORTS];
 };

 static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
Index: ofed_kernel-fixes/drivers/infiniband/hw/mlx4/qp.c
===================================================================
--- ofed_kernel-fixes.orig/drivers/infiniband/hw/mlx4/qp.c      2010-09-01 
15:30:01.000000000 +0300
+++ ofed_kernel-fixes/drivers/infiniband/hw/mlx4/qp.c   2010-09-01 
15:33:05.391464429 +0300
@@ -967,7 +967,6 @@ static int mlx4_set_path(struct mlx4_ib_
                        --path->static_rate;
        } else
                path->static_rate = 0;
-       path->counter_index = 0xff;

        if (ah->ah_flags & IB_AH_GRH) {
                if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
@@ -1101,6 +1100,12 @@ static int __mlx4_ib_modify_qp(struct ib
                }
        }

+       if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR &&
+           dev->counters[qp->port - 1] != -1) {
+               context->pri_path.counter_index = dev->counters[qp->port - 1];
+               optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+       }
+
        if (attr_mask & IB_QP_PKEY_INDEX) {
                context->pri_path.pkey_index = attr->pkey_index;
                optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
Index: ofed_kernel-fixes/drivers/net/mlx4/fw.h
===================================================================
--- ofed_kernel-fixes.orig/drivers/net/mlx4/fw.h        2010-09-01 
15:30:01.000000000 +0300
+++ ofed_kernel-fixes/drivers/net/mlx4/fw.h     2010-09-01 15:33:05.199464059 
+0300
@@ -146,7 +146,6 @@ struct mlx4_init_hca_param {
        u8  log_mc_table_sz;
        u8  log_mpt_sz;
        u8  log_uar_sz;
-       u8  counter_mode;
 };

 struct mlx4_init_ib_param {
Index: ofed_kernel-fixes/drivers/net/mlx4/main.c
===================================================================
--- ofed_kernel-fixes.orig/drivers/net/mlx4/main.c      2010-09-01 
15:30:01.000000000 +0300
+++ ofed_kernel-fixes/drivers/net/mlx4/main.c   2010-09-01 15:33:05.219464453 
+0300
@@ -1009,17 +1009,29 @@ int mlx4_counter_alloc(struct mlx4_dev *
 {
        struct mlx4_priv *priv = mlx4_priv(dev);

-       *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
-       if (*idx == -1)
+       switch (dev->caps.counters_mode) {
+       case MLX4_CUNTERS_BASIC:
+       case MLX4_CUNTERS_EXT:
+               *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
+               if (*idx == -1)
+                       return -ENOMEM;
+               return 0;
+       default:
                return -ENOMEM;
-
-       return 0;
+       }
 }
 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);

 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
 {
-       mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
+       switch (dev->caps.counters_mode) {
+       case MLX4_CUNTERS_BASIC:
+       case MLX4_CUNTERS_EXT:
+               mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx);
+               return;
+       default:
+               return;
+       }
 }
 EXPORT_SYMBOL_GPL(mlx4_counter_free);

Index: ofed_kernel-fixes/include/linux/mlx4/cmd.h
===================================================================
--- ofed_kernel-fixes.orig/include/linux/mlx4/cmd.h     2010-09-01 
15:30:01.000000000 +0300
+++ ofed_kernel-fixes/include/linux/mlx4/cmd.h  2010-09-01 15:30:01.000000000 
+0300
@@ -122,6 +122,10 @@ enum {
        /* debug commands */
        MLX4_CMD_QUERY_DEBUG_MSG = 0x2a,
        MLX4_CMD_SET_DEBUG_MSG   = 0x2b,
+
+       /* statistics commands */
+       MLX4_CMD_QUERY_IF_STAT   = 0X54,
+       MLX4_CMD_SET_IF_STAT     = 0X55,
 };

 enum {
Index: ofed_kernel-fixes/include/linux/mlx4/device.h
===================================================================
--- ofed_kernel-fixes.orig/include/linux/mlx4/device.h  2010-09-01 
15:30:01.000000000 +0300
+++ ofed_kernel-fixes/include/linux/mlx4/device.h       2010-09-01 
15:33:05.375463893 +0300
@@ -411,6 +411,42 @@ union mlx4_ext_av {
        struct mlx4_eth_av      eth;
 };

+struct mlx4_counters {
+       __be32  counter_mode;
+       __be32  num_ifc;
+       u32     reserved[2];
+       __be64  rx_frames;
+       __be64  rx_bytes;
+       __be64  tx_frames;
+       __be64  tx_bytes;
+};
+
+struct mlx4_counters_ext {
+       __be32  counter_mode;
+       __be32  num_ifc;
+       u32     reserved[2];
+       __be64  rx_uni_frames;
+       __be64  rx_uni_bytes;
+       __be64  rx_mcast_frames;
+       __be64  rx_mcast_bytes;
+       __be64  rx_bcast_frames;
+       __be64  rx_bcast_bytes;
+       __be64  rx_nobuf_frames;
+       __be64  rx_nobuf_bytes;
+       __be64  rx_err_frames;
+       __be64  rx_err_bytes;
+       __be64  tx_uni_frames;
+       __be64  tx_uni_bytes;
+       __be64  tx_mcast_frames;
+       __be64  tx_mcast_bytes;
+       __be64  tx_bcast_frames;
+       __be64  tx_bcast_bytes;
+       __be64  tx_nobuf_frames;
+       __be64  tx_nobuf_bytes;
+       __be64  tx_err_frames;
+       __be64  tx_err_bytes;
+};
+
 struct mlx4_dev {
        struct pci_dev         *pdev;
        unsigned long           flags;
Index: ofed_kernel-fixes/include/linux/mlx4/qp.h
===================================================================
--- ofed_kernel-fixes.orig/include/linux/mlx4/qp.h      2010-09-01 
15:30:01.000000000 +0300
+++ ofed_kernel-fixes/include/linux/mlx4/qp.h   2010-09-01 15:33:05.359464345 
+0300
@@ -54,7 +54,8 @@ enum mlx4_qp_optpar {
        MLX4_QP_OPTPAR_RETRY_COUNT              = 1 << 12,
        MLX4_QP_OPTPAR_RNR_RETRY                = 1 << 13,
        MLX4_QP_OPTPAR_ACK_TIMEOUT              = 1 << 14,
-       MLX4_QP_OPTPAR_SCHED_QUEUE              = 1 << 16
+       MLX4_QP_OPTPAR_SCHED_QUEUE              = 1 << 16,
+       MLX4_QP_OPTPAR_COUNTER_INDEX            = 1 << 20
 };

 enum mlx4_qp_state {
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to