>From d2e52a8b9de02521b01de3414562f45e476cafbf Mon Sep 17 00:00:00 2001
From: Ira K. Weiny <[EMAIL PROTECTED]>
Date: Wed, 30 Jul 2008 17:28:30 -0700
Subject: [PATCH] Add a Node Description check on light sweep to ensure that the 
ND has been
 found for each node.  This case covers the condition where a ND message is
 dropped/lost for some reason and OpenSM is left with a valid configured node
 which is not named correctly.

This is not the same as a node which has changed it's Node Descriptioin.  In
this case the node needs to send a trap.

Signed-off-by: Ira K. Weiny <[EMAIL PROTECTED]>
---
 opensm/opensm/osm_state_mgr.c |   54 +++++++++++++++++++++++++++++++++++++++++
 1 files changed, 54 insertions(+), 0 deletions(-)

diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c
index b599582..15124c9 100644
--- a/opensm/opensm/osm_state_mgr.c
+++ b/opensm/opensm/osm_state_mgr.c
@@ -506,6 +506,54 @@ Exit:
 }
 
 /**********************************************************************
+ During a light sweep check each node to see if the node descriptor is valid
+ if not issue a ND query.
+**********************************************************************/
+static void __osm_state_mgr_get_node_desc(IN cl_map_item_t * const p_object,
+                                       IN void *context)
+{
+       osm_physp_t *p_physp = NULL;
+       osm_node_t *const p_node = (osm_node_t *) p_object;
+       ib_api_status_t status = IB_SUCCESS;
+       osm_madw_context_t mad_context;
+       osm_sm_t *sm = (osm_sm_t *)context;
+
+       OSM_LOG_ENTER(sm->p_log);
+
+       CL_ASSERT(p_node);
+
+       if (p_node->print_desc && strcmp(p_node->print_desc, "<unknown>"))
+               /* if ND is valid, do nothing */
+               goto exit;
+
+       OSM_LOG(sm->p_log, OSM_LOG_ERROR,
+               "__osm_state_mgr_get_node_desc: "
+               "Unknown node description \"%s\" for node 0x%016" PRIx64
+               ".  Reissuing ND query\n",
+               p_node->print_desc ? p_node->print_desc : "<unknown>",
+               cl_ntoh64(osm_node_get_node_guid (p_node)));
+
+       /* get a physp to request from. */
+       p_physp = osm_node_get_any_physp_ptr(p_node);
+
+       mad_context.nd_context.node_guid = osm_node_get_node_guid(p_node);
+
+       status = osm_req_get(sm,
+                            osm_physp_get_dr_path_ptr(p_physp),
+                            IB_MAD_ATTR_NODE_DESC,
+                            0, CL_DISP_MSGID_NONE, &mad_context);
+       if (status != IB_SUCCESS)
+               OSM_LOG(sm->p_log, OSM_LOG_ERROR,
+                       "__osm_ni_rcv_get_node_desc: ERR 0D03: "
+                       "Failure initiating NodeDescription request (%s)\n",
+                       ib_get_err_str(status));
+
+exit:
+       OSM_LOG_EXIT(sm->p_log);
+}
+
+
+/**********************************************************************
  Initiates a lightweight sweep of the subnet.
  Used during normal sweeps after the subnet is up.
 **********************************************************************/
@@ -514,6 +562,7 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN 
osm_sm_t * sm)
        ib_api_status_t status = IB_SUCCESS;
        osm_bind_handle_t h_bind;
        cl_qmap_t *p_sw_tbl;
+       cl_qmap_t *p_node_tbl;
        cl_map_item_t *p_next;
        osm_node_t *p_node;
        osm_physp_t *p_physp;
@@ -522,6 +571,7 @@ static ib_api_status_t __osm_state_mgr_light_sweep_start(IN 
osm_sm_t * sm)
        OSM_LOG_ENTER(sm->p_log);
 
        p_sw_tbl = &sm->p_subn->sw_guid_tbl;
+       p_node_tbl = &sm->p_subn->node_guid_tbl;
 
        /*
         * First, get the bind handle.
@@ -540,6 +590,10 @@ static ib_api_status_t 
__osm_state_mgr_light_sweep_start(IN osm_sm_t * sm)
        cl_qmap_apply_func(p_sw_tbl, __osm_state_mgr_get_sw_info, sm);
        CL_PLOCK_RELEASE(sm->p_lock);
 
+       CL_PLOCK_ACQUIRE(sm->p_lock);
+       cl_qmap_apply_func(p_node_tbl, __osm_state_mgr_get_node_desc, sm);
+       CL_PLOCK_RELEASE(sm->p_lock);
+
        /* now scan the list of physical ports that were not down but have no 
remote port */
        CL_PLOCK_ACQUIRE(sm->p_lock);
        p_next = cl_qmap_head(&sm->p_subn->node_guid_tbl);
-- 
1.5.4.5

_______________________________________________
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to