[PATCH] Add trap details to notice log message

2013-05-28 Thread Line Holen
Signed-off-by: Line Holen line.ho...@oracle.com

---

diff --git a/include/iba/ib_types.h b/include/iba/ib_types.h
index 032adc3..d95c96e 100644
--- a/include/iba/ib_types.h
+++ b/include/iba/ib_types.h
@@ -3,6 +3,7 @@
  * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
  * Copyright (c) 2009 HNR Consulting. All rights reserved.
+ * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -7619,6 +7620,23 @@ ib_member_set_join_state(IN OUT ib_member_rec_t * 
p_mc_rec,
 #define IB_NOTICE_TYPE_INFO0x04
 #define IB_NOTICE_TYPE_EMPTY   0x7F
 
+#define SM_GID_IN_SERVICE_TRAP 64
+#define SM_GID_OUT_OF_SERVICE_TRAP 65
+#define SM_MGID_CREATED_TRAP   66
+#define SM_MGID_DESTROYED_TRAP 67
+#define SM_UNPATH_TRAP 68
+#define SM_REPATH_TRAP 69
+#define SM_LINK_STATE_CHANGED_TRAP 128
+#define SM_LINK_INTEGRITY_THRESHOLD_TRAP   129
+#define SM_BUFFER_OVERRUN_THRESHOLD_TRAP   130
+#define SM_WATCHDOG_TIMER_EXPIRED_TRAP 131
+#define SM_CAP_MASK_CHANGED_TRAP   144
+#define SM_SYS_IMG_GUID_CHANGED_TRAP   145
+#define SM_BAD_MKEY_TRAP   256
+#define SM_BAD_PKEY_TRAP   257
+#define SM_BAD_QKEY_TRAP   258
+#define SM_BAD_SWITCH_PKEY_TRAP259
+
 #include complib/cl_packon.h
 typedef struct _ib_mad_notice_attr // Total Size calc  Accumulated
 {
diff --git a/opensm/osm_inform.c b/opensm/osm_inform.c
index f5abbc6..3dcd48e 100644
--- a/opensm/osm_inform.c
+++ b/opensm/osm_inform.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
  * Copyright (c) 2009 HNR Consulting. All rights reserved.
+ * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -316,7 +317,8 @@ static ib_api_status_t send_report(IN osm_infr_t * 
p_infr_rec,  /* the informinfo
 
if (!p_report_madw) {
OSM_LOG(p_log, OSM_LOG_ERROR, ERR 0203: 
-   osm_mad_pool_get failed\n);
+   Cannot send report to LID %u, osm_mad_pool_get 
failed\n,
+   cl_ntoh16(p_infr_rec-report_addr.dest_lid));
status = IB_ERROR;
goto Exit;
}
@@ -594,14 +596,141 @@ static void log_notice(osm_log_t * log, osm_log_level_t 
level,
gid = ntc-data_details.ntc_64_67.gid;
else
gid = ntc-issuer_gid;
-   OSM_LOG(log, level,
-   Reporting Generic Notice type:%u num:%u (%s)
-from LID:%u GID:%s\n,
-   ib_notice_get_type(ntc),
-   cl_ntoh16(ntc-g_or_v.generic.trap_num),
-   ib_get_trap_str(ntc-g_or_v.generic.trap_num),
-   cl_ntoh16(ntc-issuer_lid),
-   inet_ntop(AF_INET6, gid-raw, gid_str, sizeof gid_str));
+
+   switch (cl_ntoh16(ntc-g_or_v.generic.trap_num)) {
+   case SM_GID_IN_SERVICE_TRAP:
+   case SM_GID_OUT_OF_SERVICE_TRAP:
+   OSM_LOG(log, level,
+   Reporting Informational Notice \%s\, 
GID:%s\n,
+   ib_get_trap_str(ntc-g_or_v.generic.trap_num),
+   inet_ntop(AF_INET6, gid-raw, gid_str, sizeof 
gid_str));
+   break;
+   case SM_MGID_CREATED_TRAP:
+   case SM_MGID_DESTROYED_TRAP:
+   OSM_LOG(log, level,
+   Reporting Informational Notice \%s\, 
MGID:%s\n,
+   ib_get_trap_str(ntc-g_or_v.generic.trap_num),
+   inet_ntop(AF_INET6, gid-raw, gid_str, sizeof 
gid_str));
+   break;
+   case SM_UNPATH_TRAP:
+   case SM_REPATH_TRAP:
+   /* TODO: Fill in details once SM starts to use these 
traps */
+   OSM_LOG(log, level,
+   Reporting Informational Notice \%s\n,
+   ib_get_trap_str(ntc-g_or_v.generic.trap_num));
+   break;
+   case SM_LINK_STATE_CHANGED_TRAP:
+   

[PATCH] RDMA/ocrdma: removed use_cnt for queues.

2013-05-28 Thread bgottumukkala
From: Naresh Gottumukkala bgottumukk...@emulex.com

Removed use_cnt. Rely on OFED stack to keep track of the use count.

Signed-off-by: Naresh Gottumukkala bgottumukk...@emulex.com
---
 drivers/infiniband/hw/ocrdma/ocrdma.h   |  4 ---
 drivers/infiniband/hw/ocrdma/ocrdma_hw.c|  1 -
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 39 +
 3 files changed, 1 insertion(+), 43 deletions(-)

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h 
b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 48970af..21d99f6 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -97,7 +97,6 @@ struct ocrdma_queue_info {
u16 id; /* qid, where to ring the doorbell. */
u16 head, tail;
bool created;
-   atomic_t used;  /* Number of valid elements in the queue */
 };
 
 struct ocrdma_eq {
@@ -198,7 +197,6 @@ struct ocrdma_cq {
struct ocrdma_ucontext *ucontext;
dma_addr_t pa;
u32 len;
-   atomic_t use_cnt;
 
/* head of all qp's sq and rq for which cqes need to be flushed
 * by the software.
@@ -210,7 +208,6 @@ struct ocrdma_pd {
struct ib_pd ibpd;
struct ocrdma_dev *dev;
struct ocrdma_ucontext *uctx;
-   atomic_t use_cnt;
u32 id;
int num_dpp_qp;
u32 dpp_page;
@@ -246,7 +243,6 @@ struct ocrdma_srq {
 
struct ocrdma_qp_hwq_info rq;
struct ocrdma_pd *pd;
-   atomic_t use_cnt;
u32 id;
u64 *rqe_wr_id_tbl;
u32 *idx_bit_fields;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 71942af..910b706 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -128,7 +128,6 @@ static inline struct ocrdma_mqe *ocrdma_get_mqe(struct 
ocrdma_dev *dev)
 static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev)
 {
dev-mq.sq.head = (dev-mq.sq.head + 1)  (OCRDMA_MQ_LEN - 1);
-   atomic_inc(dev-mq.sq.used);
 }
 
 static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c 
b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index b29a424..38c145b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -398,7 +398,6 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
kfree(pd);
return ERR_PTR(status);
}
-   atomic_set(pd-use_cnt, 0);
 
if (udata  context) {
status = ocrdma_copy_pd_uresp(pd, context, udata);
@@ -419,12 +418,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
int status;
u64 usr_db;
 
-   if (atomic_read(pd-use_cnt)) {
-   ocrdma_err(%s(%d) pd=0x%x is in use.\n,
-  __func__, dev-id, pd-id);
-   status = -EFAULT;
-   goto dealloc_err;
-   }
status = ocrdma_mbx_dealloc_pd(dev, pd);
if (pd-uctx) {
u64 dpp_db = dev-nic_info.dpp_unmapped_addr +
@@ -436,7 +429,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
ocrdma_del_mmap(pd-uctx, usr_db, dev-nic_info.db_page_size);
}
kfree(pd);
-dealloc_err:
return status;
 }
 
@@ -474,7 +466,6 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd 
*ibpd,
return ERR_PTR(-ENOMEM);
}
mr-pd = pd;
-   atomic_inc(pd-use_cnt);
mr-ibmr.lkey = mr-hwmr.lkey;
if (mr-hwmr.remote_wr || mr-hwmr.remote_rd)
mr-ibmr.rkey = mr-hwmr.lkey;
@@ -664,7 +655,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 
start, u64 len,
if (status)
goto mbx_err;
mr-pd = pd;
-   atomic_inc(pd-use_cnt);
mr-ibmr.lkey = mr-hwmr.lkey;
if (mr-hwmr.remote_wr || mr-hwmr.remote_rd)
mr-ibmr.rkey = mr-hwmr.lkey;
@@ -689,7 +679,6 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
if (mr-hwmr.fr_mr == 0)
ocrdma_free_mr_pbl_tbl(dev, mr-hwmr);
 
-   atomic_dec(mr-pd-use_cnt);
/* it could be user registered memory. */
if (mr-umem)
ib_umem_release(mr-umem);
@@ -752,7 +741,6 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int 
entries, int vector,
 
spin_lock_init(cq-cq_lock);
spin_lock_init(cq-comp_handler_lock);
-   atomic_set(cq-use_cnt, 0);
INIT_LIST_HEAD(cq-sq_head);
INIT_LIST_HEAD(cq-rq_head);
cq-dev = dev;
@@ -799,9 +787,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
struct ocrdma_dev *dev = cq-dev;
 
-   if (atomic_read(cq-use_cnt))
-   return -EINVAL;
-
status = ocrdma_mbx_destroy_cq(dev, cq);
 
if (cq-ucontext) {
@@ -1023,15 +1008,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp 
*qp,
qp-state = 

[PATCH] Segfault in osm_mgrp_delete_port()

2013-05-28 Thread Line Holen
Segfaults can occur in osm_mgrp_delete_port() if the last
full member of a MCG is removed while other non-full members
still exist.

Signed-off-by: Line Holen line.ho...@oracle.com

---

diff --git a/include/opensm/osm_multicast.h b/include/opensm/osm_multicast.h
index 11d789b..e192a72 100644
--- a/include/opensm/osm_multicast.h
+++ b/include/opensm/osm_multicast.h
@@ -2,6 +2,7 @@
  * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
+ * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -447,7 +448,7 @@ void osm_mgrp_delete_port(IN osm_subn_t * subn, IN 
osm_log_t * log,
 * SEE ALSO
 */
 
-void osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * 
mgrp,
+boolean_t osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t 
* mgrp,
  osm_mcm_alias_guid_t * mcm_alias_guid,
  ib_member_rec_t * mcmr);
 void osm_mgrp_cleanup(osm_subn_t * subn, osm_mgrp_t * mpgr);
diff --git a/opensm/osm_multicast.c b/opensm/osm_multicast.c
index c43d58d..eb93c55 100644
--- a/opensm/osm_multicast.c
+++ b/opensm/osm_multicast.c
@@ -2,6 +2,7 @@
  * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2002-2012 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
+ * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -338,12 +339,13 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t * subn, 
osm_log_t * log,
return mcm_port;
 }
 
-void osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * 
mgrp,
+boolean_t osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t 
* mgrp,
  osm_mcm_alias_guid_t * mcm_alias_guid,
  ib_member_rec_t *mcmr)
 {
uint8_t join_state = mcmr-scope_state  0xf;
uint8_t port_join_state, new_join_state;
+   boolean_t mgrp_deleted = FALSE;
 
/*
 * according to the same o15-0.1.14 we get the stored
@@ -406,9 +408,12 @@ void osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * 
log, osm_mgrp_t * mgrp,
--mgrp-full_members == 0) {
mgrp_send_notice(subn, log, mgrp, 67);
osm_mgrp_cleanup(subn, mgrp);
+   mgrp_deleted = TRUE;
}
 
subn-p_osm-sa.dirty = TRUE;
+
+   return (mgrp_deleted);
 }
 
 void osm_mgrp_delete_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * 
mgrp,
@@ -416,14 +421,16 @@ void osm_mgrp_delete_port(osm_subn_t * subn, osm_log_t * 
log, osm_mgrp_t * mgrp,
 {
osm_mcm_alias_guid_t *mcm_alias_guid, *next_mcm_alias_guid;
ib_member_rec_t mcmrec;
+   boolean_t mgrp_deleted = FALSE;
 
next_mcm_alias_guid = (osm_mcm_alias_guid_t *) 
cl_qmap_head(mgrp-mcm_alias_port_tbl);
-   while (next_mcm_alias_guid != (osm_mcm_alias_guid_t *) 
cl_qmap_end(mgrp-mcm_alias_port_tbl)) {
+   while (next_mcm_alias_guid != (osm_mcm_alias_guid_t *) 
cl_qmap_end(mgrp-mcm_alias_port_tbl) 
+ !mgrp_deleted) {
mcm_alias_guid = next_mcm_alias_guid;
next_mcm_alias_guid = (osm_mcm_alias_guid_t *) 
cl_qmap_next(next_mcm_alias_guid-map_item);
if (mcm_alias_guid-p_base_mcm_port-port == port) {
mcmrec.scope_state = 0xf;
-   osm_mgrp_remove_port(subn, log, mgrp, mcm_alias_guid,
+   mgrp_deleted = osm_mgrp_remove_port(subn, log, mgrp, 
mcm_alias_guid,
 mcmrec);
}
}
diff --git a/opensm/osm_sa_mcmember_record.c b/opensm/osm_sa_mcmember_record.c
index 242fcde..4d4070f 100644
--- a/opensm/osm_sa_mcmember_record.c
+++ b/opensm/osm_sa_mcmember_record.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved.
  * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
  * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
+ * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -979,7 +980,7 @@ static void mcmr_rcv_leave_mgrp(IN osm_sa_t * sa, IN 
osm_madw_t * p_madw)
}
 
/* remove port and/or update join state */
-   osm_mgrp_remove_port(sa-p_subn, sa-p_log, p_mgrp, p_mcm_alias_guid,
+   (void) osm_mgrp_remove_port(sa-p_subn, sa-p_log, p_mgrp, 

[PATCH] infiniband_diags/src/saquery.c: Add SMInfoRecord support

2013-05-28 Thread Husam Kahalah

This patch is the combination of 2 patches
infiniband_diags/src/saquery.c: Add SMInfoRecord support
infiniband_diags/doc/rst/saquery.8.in.rst: Add support to the manual

Signed-off-by: Husam Kahalah hkaha...@asaltech.com

---
 doc/rst/saquery.8.in.rst |1 +
 src/saquery.c|   45 
+

 2 files changed, 46 insertions(+)

diff --git a/doc/rst/saquery.8.in.rst b/doc/rst/saquery.8.in.rst
index 8d87096..0551917 100644
--- a/doc/rst/saquery.8.in.rst
+++ b/doc/rst/saquery.8.in.rst
@@ -156,6 +156,7 @@ Supported query names (and aliases):
 MFTRecord (MFTR) [[mlid]/[position]/[block]]
 GUIDInfoRecord (GIR) [[lid]/[block]]
 SwitchInfoRecord (SWIR) [lid]
+SMInfoRecord (SMIR) [lid]



diff --git a/src/saquery.c b/src/saquery.c
index a5f9171..d60ac19 100644
--- a/src/saquery.c
+++ b/src/saquery.c
@@ -487,6 +487,30 @@ static void dump_service_record(void *data)
cl_ntoh64(p_sr-service_data64[1]));
 }

+static void dump_sm_info_record(void *data)
+{
+ib_sminfo_record_t *p_smr = data;
+const ib_sm_info_t *const p_smi = p_smr-sm_info;
+uint8_t priority, state;
+priority = ib_sminfo_get_priority(p_smi);
+state = ib_sminfo_get_state(p_smi);
+
+printf(SMInfoRecord dump:\n
+   \t\tRID\n
+   \t\tLID...%u\n
+   \t\tSMInfo dump:\n
+   \t\tGUID..0x%016 PRIx64 \n
+   \t\tSM_Key0x%016 PRIx64 \n
+   \t\tActCount..0x%X\n
+   \t\tPriority..%u\n
+   \t\tSMState...%u\n,
+   cl_ntoh16(p_smr-lid),
+   cl_ntoh64(p_smr-sm_info.guid),
+   cl_ntoh64(p_smr-sm_info.sm_key),
+   cl_ntoh32(p_smr-sm_info.act_count),
+   priority, state);
+}
+
 static void dump_switch_info_record(void *data)
 {
 ib_switch_info_record_t *p_sir = data;
@@ -1192,6 +1216,25 @@ static int query_service_records(const struct 
query_cmd *q, struct sa_handle * h

 dump_service_record);
 }

+static int query_sm_info_records(const struct query_cmd *q,
+ struct sa_handle * h, struct query_params *p,
+ int argc, char *argv[])
+{
+ib_sminfo_record_t smir;
+ib_net64_t comp_mask = 0;
+int lid = 0;
+
+if (argc  0)
+parse_lid_and_ports(h, argv[0], lid, NULL, NULL);
+
+memset(smir, 0, sizeof(smir));
+CHECK_AND_SET_VAL(lid, 16, 0, smir.lid, SMIR, LID);
+
+return get_and_dump_any_records(h, IB_SA_ATTR_SMINFORECORD, 0,
+comp_mask, smir, sizeof(smir),
+dump_sm_info_record);
+}
+
 static int query_switchinfo_records(const struct query_cmd *q,
 struct sa_handle * h, struct query_params *p,
 int argc, char *argv[])
@@ -1404,6 +1447,8 @@ static const struct query_cmd query_cmds[] = {
  [[lid]/[block]], query_guidinfo_records},
 {SwitchInfoRecord, SWIR, IB_SA_ATTR_SWITCHINFORECORD,
  [lid], query_switchinfo_records},
+{SMInfoRecord, SMIR, IB_SA_ATTR_SMINFORECORD,
+[lid], query_sm_info_records},
 {0}
 };

--
1.7.9.6



--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC][PATCH] mm: Fix RLIMIT_MEMLOCK

2013-05-28 Thread Christoph Lameter
On Mon, 27 May 2013, Peter Zijlstra wrote:

 Before your patch pinned was included in locked and thus RLIMIT_MEMLOCK
 had a single resource counter. After your patch RLIMIT_MEMLOCK is
 applied separately to both -- more or less.

Before the patch the count was doubled since a single page was counted
twice: Once because it was mlocked (marked with PG_mlock) and then again
because it was also pinned (the refcount was increased). Two different things.

We have agreed for a long time that mlocked pages are movable. That is not
true for pinned pages and therefore pinning pages therefore do not fall
into that category (Hugh? AFAICR you came up with that rule?)

 NO, mlocked pages are pages that do not leave core memory; IOW do not
 cause major faults. Pinning pages is a perfectly spec compliant mlock()
 implementation.

That is not the definition that we have used so far.

 Now in an earlier discussion on the issue 'we' (I can't remember if you
 participated there, I remember Mel and Kosaki-San) agreed that for
 'normal' (read not whacky real-time people) mlock can still be useful
 and we should introduce a pinned user API for the RT people.

Right. I remember that.

  Pinned pages are pages that have an elevated refcount because the hardware
  needs to use these pages for I/O. The elevated refcount may be temporary
  (then we dont care about this) or for a longer time (such as the memory
  registration of the IB subsystem). That is when we account the memory as
  pinned. The elevated refcount stops page migration and other things from
  trying to move that memory.

 Again I _know_ that!!!

But then you refuse to acknowledge the difference and want to conflate
both.

  Pages can be both pinned and mlocked.

 Right, but apart for mlockall() this is a highly unlikely situation to
 actually occur. And if you're using mlockall() you've effectively
 disabled RLIMIT_MEMLOCK and thus nobody cares if the resource counter
 goes funny.

mlockall() would never be used on all processes. You still need the
RLIMIT_MLOCK to ensure that the box does not lock up.

  I think we need to be first clear on what we want to accomplish and what
  these counters actually should count before changing things.

 Backward isn't it... _you_ changed it without consideration.

I applied the categorization that we had agreed on before during the
development of page migratiob. Pinning is not compatible.

 The IB code does a big get_user_pages(), which last time I checked
 pins a sequential range of pages. Therefore the VMA approach.

The IB code (and other code) can require the pinning of pages in various
ways.
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] IB/qib: add optional numa affinity

2013-05-28 Thread Mike Marciniszyn
From: Ramkrishna Vepa ramkrishna.v...@intel.com

This patch adds context relative numa affinity conditioned on the
module parameter numa_aware. The qib_ctxtdata has an additional
node_id member and qib_create_ctxtdata() has an addition node_id
parameter.

The allocations within the hdr queue and eager queue setup routines now
take this additional member and adjust allocations as necesary.  PSM
will pass the either current numa node or the node closest to the HCA
depending on numa_aware. Verbs will always use the node closest to
the HCA.

Reviewed-by: Dean Luick dean.lu...@intel.com
Signed-off-by: Ramkrishna Vepa ramkrishna.v...@intel.com
Signed-off-by: Vinit Agnihotri vinit.abhay.agniho...@intel.com
Signed-off-by: Mike Marciniszyn mike.marcinis...@intel.com
---
 drivers/infiniband/hw/qib/qib.h  |7 +
 drivers/infiniband/hw/qib/qib_file_ops.c |6 -
 drivers/infiniband/hw/qib/qib_init.c |   39 +-
 3 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 5643ce4..ca740d8 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -154,6 +154,8 @@ struct qib_ctxtdata {
 */
/* instead of calculating it */
unsigned ctxt;
+   /* local node of context */
+   int node_id;
/* non-zero if ctxt is being shared. */
u16 subctxt_cnt;
/* non-zero if ctxt is being shared. */
@@ -1092,6 +1094,8 @@ struct qib_devdata {
u16 psxmitwait_check_rate;
/* high volume overflow errors defered to tasklet */
struct tasklet_struct error_tasklet;
+
+   int assigned_node_id; /* NUMA node closest to HCA */
 };
 
 /* hol_state values */
@@ -1171,7 +1175,7 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct 
qib_ctxtdata *);
 int qib_setup_eagerbufs(struct qib_ctxtdata *);
 void qib_set_ctxtcnt(struct qib_devdata *);
 int qib_create_ctxts(struct qib_devdata *dd);
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32);
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
 void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
 void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
 
@@ -1462,6 +1466,7 @@ extern unsigned qib_n_krcv_queues;
 extern unsigned qib_sdma_fetch_arb;
 extern unsigned qib_compat_ddr_negotiate;
 extern int qib_special_trigger;
+extern unsigned qib_numa_aware;
 
 extern struct mutex qib_mutex;
 
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c 
b/drivers/infiniband/hw/qib/qib_file_ops.c
index 4f7aa30..5003440 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1263,8 +1263,12 @@ static int setup_ctxt(struct qib_pportdata *ppd, int 
ctxt,
struct qib_ctxtdata *rcd;
void *ptmp = NULL;
int ret;
+   int numa_id;
 
-   rcd = qib_create_ctxtdata(ppd, ctxt);
+   numa_id = qib_numa_aware ? numa_node_id() :
+   dd-assigned_node_id;
+
+   rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
 
/*
 * Allocate memory for use in qib_tid_update() at open to
diff --git a/drivers/infiniband/hw/qib/qib_init.c 
b/drivers/infiniband/hw/qib/qib_init.c
index 6176dfa..8f46fb4 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -67,6 +67,11 @@ ushort qib_cfgctxts;
 module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
 MODULE_PARM_DESC(cfgctxts, Set max number of contexts to use);
 
+unsigned qib_numa_aware;
+module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
+MODULE_PARM_DESC(numa_aware,
+   0 - PSM allocation close to HCA, 1 - PSM allocation local to 
process);
+
 /*
  * If set, do not write to any regs if avoidable, hack to allow
  * check for deranged default register values.
@@ -124,6 +129,11 @@ int qib_create_ctxts(struct qib_devdata *dd)
 {
unsigned i;
int ret;
+   int local_node_id = pcibus_to_node(dd-pcidev-bus);
+
+   if (local_node_id  0)
+   local_node_id = numa_node_id();
+   dd-assigned_node_id = local_node_id;
 
/*
 * Allocate full ctxtcnt array, rather than just cfgctxts, because
@@ -146,7 +156,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
continue;
 
ppd = dd-pport + (i % dd-num_pports);
-   rcd = qib_create_ctxtdata(ppd, i);
+
+   rcd = qib_create_ctxtdata(ppd, i, dd-assigned_node_id);
if (!rcd) {
qib_dev_err(dd,
Unable to allocate ctxtdata for Kernel ctxt, 
failing\n);
@@ -164,14 +175,16 @@ done:
 /*
  * Common code for user and kernel context setup.
  */
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
+struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
+   int node_id)
 

Re: Status of ummunot branch?

2013-05-28 Thread Roland Dreier
On Tue, May 28, 2013 at 10:51 AM, Jeff Squyres (jsquyres)
jsquy...@cisco.com wrote:
 I see a ummunot branch on your kernel tree at git.kernel.org 
 (https://git.kernel.org/cgit/linux/kernel/git/roland/infiniband.git/log/?h=ummunot).

 Just curious -- what's the status of this tree?  I ask because, as an MPI 
 guy, I would *love* to see this stuff integrated into the kernel and 
 libibverbs.

Haven't touched it in quite a while except to keep it building.  Needs
work to finish up.
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Status of ummunot branch?

2013-05-28 Thread Jeff Squyres (jsquyres)
On May 28, 2013, at 1:52 PM, Roland Dreier rol...@purestorage.com wrote:

 Haven't touched it in quite a while except to keep it building.  Needs
 work to finish up.

What kinds of things still need to be done?  (I don't know if we could work on 
this or not; just asking to scope out what would need to be done at this point)

Has anything been done on the userspace side?

-- 
Jeff Squyres
jsquy...@cisco.com
For corporate legal information go to: 
http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Add trap details to notice log message

2013-05-28 Thread Hal Rosenstock
Hi Line,

Just some nits below:

On 5/28/2013 4:46 AM, Line Holen wrote:
 Signed-off-by: Line Holen line.ho...@oracle.com
 
 ---
 
 diff --git a/include/iba/ib_types.h b/include/iba/ib_types.h
 index 032adc3..d95c96e 100644
 --- a/include/iba/ib_types.h
 +++ b/include/iba/ib_types.h
 @@ -3,6 +3,7 @@
   * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved.
   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
   * Copyright (c) 2009 HNR Consulting. All rights reserved.
 + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
 @@ -7619,6 +7620,23 @@ ib_member_set_join_state(IN OUT ib_member_rec_t * 
 p_mc_rec,
  #define IB_NOTICE_TYPE_INFO  0x04
  #define IB_NOTICE_TYPE_EMPTY 0x7F
  
 +#define SM_GID_IN_SERVICE_TRAP   64
 +#define SM_GID_OUT_OF_SERVICE_TRAP   65
 +#define SM_MGID_CREATED_TRAP 66
 +#define SM_MGID_DESTROYED_TRAP   67
 +#define SM_UNPATH_TRAP   68
 +#define SM_REPATH_TRAP   69
 +#define SM_LINK_STATE_CHANGED_TRAP   128
 +#define SM_LINK_INTEGRITY_THRESHOLD_TRAP 129
 +#define SM_BUFFER_OVERRUN_THRESHOLD_TRAP 130
 +#define SM_WATCHDOG_TIMER_EXPIRED_TRAP   131
 +#define SM_CAP_MASK_CHANGED_TRAP 144

Would name be better as SM_LOCAL_CHANGES_TRAP ?

 +#define SM_SYS_IMG_GUID_CHANGED_TRAP 145
 +#define SM_BAD_MKEY_TRAP 256
 +#define SM_BAD_PKEY_TRAP 257
 +#define SM_BAD_QKEY_TRAP 258
 +#define SM_BAD_SWITCH_PKEY_TRAP  259
 +
  #include complib/cl_packon.h
  typedef struct _ib_mad_notice_attr   // Total Size calc  Accumulated
  {
 diff --git a/opensm/osm_inform.c b/opensm/osm_inform.c
 index f5abbc6..3dcd48e 100644
 --- a/opensm/osm_inform.c
 +++ b/opensm/osm_inform.c
 @@ -3,6 +3,7 @@
   * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
   * Copyright (c) 2009 HNR Consulting. All rights reserved.
 + * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
 @@ -316,7 +317,8 @@ static ib_api_status_t send_report(IN osm_infr_t * 
 p_infr_rec,/* the informinfo
  
   if (!p_report_madw) {
   OSM_LOG(p_log, OSM_LOG_ERROR, ERR 0203: 
 - osm_mad_pool_get failed\n);
 + Cannot send report to LID %u, osm_mad_pool_get 
 failed\n,
 + cl_ntoh16(p_infr_rec-report_addr.dest_lid));
   status = IB_ERROR;
   goto Exit;
   }
 @@ -594,14 +596,141 @@ static void log_notice(osm_log_t * log, 
 osm_log_level_t level,
   gid = ntc-data_details.ntc_64_67.gid;
   else
   gid = ntc-issuer_gid;
 - OSM_LOG(log, level,
 - Reporting Generic Notice type:%u num:%u (%s)
 -  from LID:%u GID:%s\n,
 - ib_notice_get_type(ntc),
 - cl_ntoh16(ntc-g_or_v.generic.trap_num),
 - ib_get_trap_str(ntc-g_or_v.generic.trap_num),
 - cl_ntoh16(ntc-issuer_lid),
 - inet_ntop(AF_INET6, gid-raw, gid_str, sizeof gid_str));
 +
 + switch (cl_ntoh16(ntc-g_or_v.generic.trap_num)) {
 + case SM_GID_IN_SERVICE_TRAP:
 + case SM_GID_OUT_OF_SERVICE_TRAP:
 + OSM_LOG(log, level,
 + Reporting Informational Notice \%s\, 
 GID:%s\n,
 + ib_get_trap_str(ntc-g_or_v.generic.trap_num),
 + inet_ntop(AF_INET6, gid-raw, gid_str, sizeof 
 gid_str));
 + break;
 + case SM_MGID_CREATED_TRAP:
 + case SM_MGID_DESTROYED_TRAP:
 + OSM_LOG(log, level,
 + Reporting Informational Notice \%s\, 
 MGID:%s\n,
 + ib_get_trap_str(ntc-g_or_v.generic.trap_num),
 + inet_ntop(AF_INET6, gid-raw, gid_str, sizeof 
 gid_str));
 + break;
 + case SM_UNPATH_TRAP:
 + case SM_REPATH_TRAP:
 + /* TODO: Fill in details once SM starts to use these 
 traps */
 + OSM_LOG(log, level,
 + Reporting Informational Notice \%s\n,
 + 

[ANNOUNCE] libibverbs 1.1.7 is released

2013-05-28 Thread Roland Dreier
libibverbs is a library that allows programs to use RDMA verbs for
direct access to RDMA (currently InfiniBand and iWARP) hardware from
userspace.

The new stable release, 1.1.7, is available from

http://www.openfabrics.org/downloads/verbs/libibverbs-1.1.7.tar.gz

with sha1sum

3def9411b1e443cd3c8dfde761c7c6308cb98e6f  libibverbs-1.1.7.tar.gz

I also pushed the latest tree and tag out to kernel.org:

git://git.kernel.org/pub/scm/libs/infiniband/libibverbs.git

(the name of the tag is libibverbs-1.1.7).

The main new feature in this release is initial support for raw QPs.
This release is intended as a checkpoint to get pending changes out
before starting work on general extension support (with XRC support as
an initial driver).

The git shortlog since libibverbs 1.1.6 is:

Bart Van Assche (1):
  Fix a compiler warnings with NVALGRIND

Dotan Barak (2):
  Add helpers to deal with new InfiniBand link speeds
  Fix resource leaks in the pingpong examples

Doug Ledford (5):
  devinfo: Give an error when the user specifies an invalid port
  devinfo: Don't allow port == 0 as an option
  Fix the compatibility wrapper on PPC
  ud_pingpong: Don't try to send UD messages larger than MTU
  Don't print link phys state on iWARP

Jeff Squyres (2):
  libibverbs: Use autoreconf in autogen.sh
  .gitignore updates; rename configure.in-.ac

Or Gerlitz (1):
  Add raw packet QP type

Roland Dreier (1):
  Roll libibverbs 1.1.7 release

Yann Droneaud (5):
  memory: use SCNxPTR format to read uintptr_t values
  memory: comment out unused verify() function
  Open files with close on exec flag
  configure: Apply updates proposed by autoupdate
  configure: Use automake's option subdir-objects
--
To unsubscribe from this list: send the line unsubscribe linux-rdma in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html