I did not see this on the reflector. We did have some mailer problems. So I am resending to the list
One more thing to add: The only other event we considered was PORT_ACTIVE. But as it turns out the event is only generated when the port moves into ACTIVE state which means an SM already handled it... EZ Yevgeny Kliteynik wrote: > Hi Hal > > This patch implements first item of the OSM todo list. > > OpenSM opens a thread that is listening for events on the SM's port. > The events that are being taken care of are IBV_EVENT_DEVICE_FATAL and > IBV_EVENT_PORT_ERROR. > > In case of IBV_EVENT_DEVICE_FATAL, osm is forced to exit. > in case of IBV_EVENT_PORT_ERROR, osm initiates heavy sweep. > > Yevgeny > > Signed-off-by: Yevgeny Kliteynik <[EMAIL PROTECTED]> > > Index: include/opensm/osm_sm_mad_ctrl.h > =================================================================== > --- include/opensm/osm_sm_mad_ctrl.h (revision 8998) > +++ include/opensm/osm_sm_mad_ctrl.h (working copy) > @@ -109,6 +109,7 @@ typedef struct _osm_sm_mad_ctrl > osm_mad_pool_t *p_mad_pool; > osm_vl15_t *p_vl15; > osm_vendor_t *p_vendor; > + struct _osm_state_mgr *p_state_mgr; > osm_bind_handle_t h_bind; > cl_plock_t *p_lock; > cl_dispatcher_t *p_disp; > @@ -130,6 +131,9 @@ typedef struct _osm_sm_mad_ctrl > * p_vendor > * Pointer to the vendor specific interfaces object. > * > +* p_state_mgr > +* Pointer to the state manager object. > +* > * h_bind > * Bind handle returned by the transport layer. > * > @@ -233,6 +237,7 @@ osm_sm_mad_ctrl_init( > IN osm_mad_pool_t* const p_mad_pool, > IN osm_vl15_t* const p_vl15, > IN osm_vendor_t* const p_vendor, > + IN struct _osm_state_mgr* const p_state_mgr, > IN osm_log_t* const p_log, > IN osm_stats_t* const p_stats, > IN cl_plock_t* const p_lock, > @@ -251,6 +256,9 @@ osm_sm_mad_ctrl_init( > * p_vendor > * [in] Pointer to the vendor specific interfaces object. > * > +* p_state_mgr > +* [in] Pointer to the state manager object. > +* > * p_log > * [in] Pointer to the log object. > * > Index: include/vendor/osm_vendor_ibumad.h > =================================================================== > --- include/vendor/osm_vendor_ibumad.h (revision 8998) > +++ include/vendor/osm_vendor_ibumad.h (working copy) > @@ -74,6 +74,8 @@ BEGIN_C_DECLS > #define OSM_UMAD_MAX_CAS 32 > #define OSM_UMAD_MAX_PORTS_PER_CA 2 > > +#define OSM_VENDOR_SUPPORT_EVENTS > + > /* OpenIB gen2 doesn't support RMPP yet */ > > /****s* OpenSM: Vendor UMAD/osm_ca_info_t > @@ -179,6 +181,10 @@ typedef struct _osm_vendor > int umad_port_id; > void *receiver; > int issmfd; > + cl_thread_t events_thread; > + void * events_callback; > + void * sm_context; > + struct ibv_context * ibv_context; > } osm_vendor_t; > > #define OSM_BIND_INVALID_HANDLE 0 > Index: include/vendor/osm_vendor_api.h > =================================================================== > --- include/vendor/osm_vendor_api.h (revision 8998) > +++ include/vendor/osm_vendor_api.h (working copy) > @@ -526,6 +526,110 @@ osm_vendor_set_debug( > * SEE ALSO > *********/ > > +#ifdef OSM_VENDOR_SUPPORT_EVENTS > + > +#define OSM_EVENT_FATAL 1 > +#define OSM_EVENT_PORT_ERR 2 > + > +/****s* OpenSM Vendor API/osm_vend_events_callback_t > +* NAME > +* osm_vend_events_callback_t > +* > +* DESCRIPTION > +* Function prototype for the vendor events callback. > +* The vendor layer calls this function on driver events. > +* > +* SYNOPSIS > +*/ > +typedef void > +(*osm_vend_events_callback_t)( > + IN int events_mask, > + IN void * const context ); > +/* > +* PARAMETERS > +* events_mask > +* [in] The received event(s). > +* > +* context > +* [in] Context supplied as the "sm_context" argument in > +* the osm_vendor_unreg_events_cb call > +* > +* RETURN VALUES > +* None. > +* > +* NOTES > +* > +* SEE ALSO > +* osm_vendor_reg_events_cb osm_vendor_unreg_events_cb > +*********/ > + > +/****f* OpenSM Vendor API/osm_vendor_reg_events_cb > +* NAME > +* osm_vendor_reg_events_cb > +* > +* DESCRIPTION > +* Registers the events callback function and start the events > +* thread > +* > +* SYNOPSIS > +*/ > +int > +osm_vendor_reg_events_cb( > + IN osm_vendor_t * const p_vend, > + IN void * const sm_callback, > + IN void * const sm_context); > +/* > +* PARAMETERS > +* p_vend > +* [in] vendor handle. > +* > +* sm_callback > +* [in] Callback function that should be called when > +* the event is received. > +* > +* sm_context > +* [in] Context supplied as the "context" argument in > +* the subsequenct calls to the sm_callback function > +* > +* RETURN VALUE > +* IB_SUCCESS if OK. > +* > +* NOTES > +* > +* SEE ALSO > +* osm_vend_events_callback_t osm_vendor_unreg_events_cb > +*********/ > + > +/****f* OpenSM Vendor API/osm_vendor_unreg_events_cb > +* NAME > +* osm_vendor_unreg_events_cb > +* > +* DESCRIPTION > +* Un-Registers the events callback function and stops the events > +* thread > +* > +* SYNOPSIS > +*/ > +void > +osm_vendor_unreg_events_cb( > + IN osm_vendor_t * const p_vend); > +/* > +* PARAMETERS > +* p_vend > +* [in] vendor handle. > +* > +* > +* RETURN VALUE > +* None. > +* > +* NOTES > +* > +* SEE ALSO > +* osm_vend_events_callback_t osm_vendor_reg_events_cb > +*********/ > + > +#endif /* OSM_VENDOR_SUPPORT_EVENTS */ > + > END_C_DECLS > > #endif /* _OSM_VENDOR_API_H_ */ > Index: libvendor/osm_vendor_ibumad.c > =================================================================== > --- libvendor/osm_vendor_ibumad.c (revision 8998) > +++ libvendor/osm_vendor_ibumad.c (working copy) > @@ -72,6 +72,7 @@ > #include <opensm/osm_log.h> > #include <opensm/osm_mad_pool.h> > #include <vendor/osm_vendor_api.h> > +#include <infiniband/verbs.h> > > /****s* OpenSM: Vendor AL/osm_umad_bind_info_t > * NAME > @@ -441,6 +442,91 @@ Exit: > > /********************************************************************** > **********************************************************************/ > +static void > +umad_events_thread( > + IN void * vend_context) > +{ > + int res = 0; > + osm_vendor_t * p_vend = (osm_vendor_t *) vend_context; > + struct ibv_async_event event; > + > + OSM_LOG_ENTER( p_vend->p_log, umad_events_thread ); > + > + osm_log(p_vend->p_log, OSM_LOG_DEBUG, > + "umad_events_thread: Device %s, async event FD: %d\n", > + p_vend->umad_port.ca_name, p_vend->ibv_context->async_fd); > + osm_log(p_vend->p_log, OSM_LOG_DEBUG, > + "umad_events_thread: Listening for events on device %s, port > %d\n", > + p_vend->umad_port.ca_name, p_vend->umad_port.portnum); > + > + while (1) { > + > + res = ibv_get_async_event(p_vend->ibv_context, &event); > + if (res) > + { > + osm_log(p_vend->p_log, OSM_LOG_ERROR, > + "umad_events_thread: ERR 5450: " > + "Failed getting async event (device %s, port %d)\n", > + p_vend->umad_port.ca_name, p_vend->umad_port.portnum); > + goto Exit; > + } > + > + if (!p_vend->events_callback) > + { > + osm_log(p_vend->p_log, OSM_LOG_DEBUG, > + "umad_events_thread: Events callback has been > unregistered\n"); > + ibv_ack_async_event(&event); > + goto Exit; > + } > + /* > + * We're listening to events on the SM's port only > + */ > + if ( event.element.port_num == p_vend->umad_port.portnum ) > + { > + switch (event.event_type) > + { > + case IBV_EVENT_DEVICE_FATAL: > + osm_log(p_vend->p_log, OSM_LOG_INFO, > + "umad_events_thread: Received > IBV_EVENT_DEVICE_FATAL\n"); > + ((osm_vend_events_callback_t) > + (p_vend->events_callback))(OSM_EVENT_FATAL, > p_vend->sm_context); > + > + ibv_ack_async_event(&event); > + goto Exit; > + break; > + > + case IBV_EVENT_PORT_ERR: > + osm_log(p_vend->p_log, OSM_LOG_VERBOSE, > + "umad_events_thread: Received IBV_EVENT_PORT_ERR\n"); > + ((osm_vend_events_callback_t) > + (p_vend->events_callback))(OSM_EVENT_PORT_ERR, > p_vend->sm_context); > + break; > + > + default: > + osm_log(p_vend->p_log, OSM_LOG_DEBUG, > + "umad_events_thread: Received event #%d on port %d - > Ignoring\n", > + event.event_type, event.element.port_num); > + } > + } > + else > + { > + osm_log(p_vend->p_log, OSM_LOG_DEBUG, > + "umad_events_thread: Received event #%d on port %d - > Ignoring\n", > + event.event_type, event.element.port_num); > + } > + > + ibv_ack_async_event(&event); > + } > + > + Exit: > + osm_log(p_vend->p_log, OSM_LOG_DEBUG, > + "umad_events_thread: Terminating thread\n"); > + OSM_LOG_EXIT(p_vend->p_log); > + return; > +} > + > +/********************************************************************** > + **********************************************************************/ > ib_api_status_t > osm_vendor_init( > IN osm_vendor_t* const p_vend, > @@ -456,6 +542,7 @@ osm_vendor_init( > p_vend->max_retries = OSM_DEFAULT_RETRY_COUNT; > cl_spinlock_construct( &p_vend->cb_lock ); > cl_spinlock_construct( &p_vend->match_tbl_lock ); > + cl_thread_construct( &p_vend->events_thread ); > p_vend->umad_port_id = -1; > p_vend->issmfd = -1; > > @@ -1217,4 +1304,114 @@ osm_vendor_set_debug( > umad_debug(level); > } > > +/********************************************************************** > + **********************************************************************/ > +int > +osm_vendor_reg_events_cb( > + IN osm_vendor_t * const p_vend, > + IN void * const sm_callback, > + IN void * const sm_context) > +{ > + ib_api_status_t status = IB_SUCCESS; > + struct ibv_device ** dev_list; > + struct ibv_device * device; > + > + OSM_LOG_ENTER( p_vend->p_log, osm_vendor_reg_events_cb ); > + > + p_vend->events_callback = sm_callback; > + p_vend->sm_context = sm_context; > + > + dev_list = ibv_get_device_list(NULL); > + if (!dev_list || !(*dev_list)) { > + osm_log(p_vend->p_log, OSM_LOG_ERROR, > + "osm_vendor_reg_events_cb: ERR 5440: " > + "No IB devices found\n"); > + status = IB_ERROR; > + goto Exit; > + } > + > + if (!p_vend->umad_port.ca_name || !p_vend->umad_port.ca_name[0]) > + { > + osm_log(p_vend->p_log, OSM_LOG_ERROR, > + "osm_vendor_reg_events_cb: ERR 5441: " > + "Vendor initialization is not completed yet\n"); > + status = IB_ERROR; > + goto Exit; > + } > + > + osm_log(p_vend->p_log, OSM_LOG_DEBUG, > + "osm_vendor_reg_events_cb: Registering on device %s\n", > + p_vend->umad_port.ca_name); > + > + /* > + * find device whos name matches the SM's device > + */ > + for ( device = *dev_list; > + (device != NULL) && > + (strcmp(p_vend->umad_port.ca_name, ibv_get_device_name(device)) != > 0); > + device += sizeof(struct ibv_device *) ) > + ; > + if (!device) > + { > + osm_log(p_vend->p_log, OSM_LOG_ERROR, > + "osm_vendor_reg_events_cb: ERR 5442: " > + "Device %s hasn't been found in the device list\n" > + ,p_vend->umad_port.ca_name); > + status = IB_ERROR; > + goto Exit; > + } > + > + p_vend->ibv_context = ibv_open_device(device); > + if (!p_vend->ibv_context) { > + osm_log(p_vend->p_log, OSM_LOG_ERROR, > + "osm_vendor_reg_events_cb: ERR 5443: " > + "Couldn't get context for %s\n", > + p_vend->umad_port.ca_name); > + status = IB_ERROR; > + goto Exit; > + } > + > + /* > + * Initiate the events thread > + */ > + if (cl_thread_init(&p_vend->events_thread, > + umad_events_thread, > + p_vend, > + "ibumad events thread") != CL_SUCCESS) { > + osm_log(p_vend->p_log, OSM_LOG_ERROR, > + "osm_vendor_reg_events_cb: ERR 5444: " > + "Failed initiating event listening thread\n"); > + status = IB_ERROR; > + goto Exit; > + } > + > + Exit: > + if (status != IB_SUCCESS) > + { > + p_vend->events_callback = NULL; > + p_vend->sm_context = NULL; > + p_vend->ibv_context = NULL; > + p_vend->events_callback = NULL; > + } > + OSM_LOG_EXIT( p_vend->p_log ); > + return status; > +} > + > +/********************************************************************** > + **********************************************************************/ > +void > +osm_vendor_unreg_events_cb( > + IN osm_vendor_t * const p_vend) > +{ > + OSM_LOG_ENTER( p_vend->p_log, osm_vendor_unreg_events_cb ); > + p_vend->events_callback = NULL; > + p_vend->sm_context = NULL; > + p_vend->ibv_context = NULL; > + p_vend->events_callback = NULL; > + OSM_LOG_EXIT( p_vend->p_log ); > +} > + > +/********************************************************************** > + **********************************************************************/ > + > #endif /* OSM_VENDOR_INTF_OPENIB */ > Index: libvendor/libosmvendor.map > =================================================================== > --- libvendor/libosmvendor.map (revision 8998) > +++ libvendor/libosmvendor.map (working copy) > @@ -1,4 +1,4 @@ > -OSMVENDOR_2.0 { > +OSMVENDOR_2.1 { > global: > umad_receiver; > osm_vendor_init; > @@ -23,5 +23,7 @@ OSMVENDOR_2.0 { > osmv_bind_sa; > osmv_query_sa; > osm_vendor_get_guid_ca_and_port; > + osm_vendor_reg_events_cb; > + osm_vendor_unreg_events_cb; > local: *; > }; > Index: opensm/osm_sm.c > =================================================================== > --- opensm/osm_sm.c (revision 8998) > +++ opensm/osm_sm.c (working copy) > @@ -313,6 +313,7 @@ osm_sm_init( > p_sm->p_mad_pool, > p_sm->p_vl15, > p_sm->p_vendor, > + &p_sm->state_mgr, > p_log, p_stats, p_lock, p_disp ); > if( status != IB_SUCCESS ) > goto Exit; > Index: opensm/osm_sm_mad_ctrl.c > =================================================================== > --- opensm/osm_sm_mad_ctrl.c (revision 8998) > +++ opensm/osm_sm_mad_ctrl.c (working copy) > @@ -59,6 +59,7 @@ > #include <opensm/osm_msgdef.h> > #include <opensm/osm_helper.h> > #include <opensm/osm_opensm.h> > +#include <opensm/osm_state_mgr.h> > > /****f* opensm: SM/__osm_sm_mad_ctrl_retire_trans_mad > * NAME > @@ -953,6 +954,7 @@ osm_sm_mad_ctrl_init( > IN osm_mad_pool_t* const p_mad_pool, > IN osm_vl15_t* const p_vl15, > IN osm_vendor_t* const p_vendor, > + IN struct _osm_state_mgr* const p_state_mgr, > IN osm_log_t* const p_log, > IN osm_stats_t* const p_stats, > IN cl_plock_t* const p_lock, > @@ -969,6 +971,7 @@ osm_sm_mad_ctrl_init( > p_ctrl->p_disp = p_disp; > p_ctrl->p_mad_pool = p_mad_pool; > p_ctrl->p_vendor = p_vendor; > + p_ctrl->p_state_mgr = p_state_mgr; > p_ctrl->p_stats = p_stats; > p_ctrl->p_lock = p_lock; > p_ctrl->p_vl15 = p_vl15; > @@ -995,6 +998,47 @@ osm_sm_mad_ctrl_init( > > /********************************************************************** > **********************************************************************/ > +void > +__osm_vend_events_callback( > + IN int events_mask, > + IN void * const context ) > +{ > + osm_sm_mad_ctrl_t * const p_ctrl = (osm_sm_mad_ctrl_t * const) context; > + > + OSM_LOG_ENTER(p_ctrl->p_log, __osm_vend_events_callback); > + > + if (events_mask & OSM_EVENT_FATAL) > + { > + osm_log(p_ctrl->p_log, OSM_LOG_INFO, > + "__osm_vend_events_callback: " > + "Events callback got OSM_EVENT_FATAL\n"); > + osm_log(p_ctrl->p_log, OSM_LOG_SYS, > + "Fatal HCA error - forcing OpenSM exit\n"); > + osm_exit_flag = 1; > + OSM_LOG_EXIT(p_ctrl->p_log); > + return; > + } > + > + if (events_mask & OSM_EVENT_PORT_ERR) > + { > + osm_log(p_ctrl->p_log, OSM_LOG_INFO, > + "__osm_vend_events_callback: " > + "Events callback got OSM_EVENT_PORT_ERR - forcing heavy > sweep\n"); > + p_ctrl->p_subn->force_immediate_heavy_sweep = TRUE; > + osm_state_mgr_process((osm_state_mgr_t * const)p_ctrl->p_state_mgr, > + OSM_SIGNAL_SWEEP); > + OSM_LOG_EXIT(p_ctrl->p_log); > + return; > + } > + > + osm_log(p_ctrl->p_log, OSM_LOG_INFO, > + "__osm_vend_events_callback: " > + "Events callback got event mask of %d - No action taken\n"); > + OSM_LOG_EXIT(p_ctrl->p_log); > +} > + > +/********************************************************************** > + **********************************************************************/ > ib_api_status_t > osm_sm_mad_ctrl_bind( > IN osm_sm_mad_ctrl_t* const p_ctrl, > @@ -1044,6 +1088,17 @@ osm_sm_mad_ctrl_bind( > goto Exit; > } > > + if ( osm_vendor_reg_events_cb(p_ctrl->p_vendor, > + __osm_vend_events_callback, > + p_ctrl) ) > + { > + status = IB_ERROR; > + osm_log( p_ctrl->p_log, OSM_LOG_ERROR, > + "osm_sm_mad_ctrl_bind: ERR 3120: " > + "Vendor failed to register for events\n" ); > + goto Exit; > + } > + > Exit: > OSM_LOG_EXIT( p_ctrl->p_log ); > return( status ); > Index: config/osmvsel.m4 > =================================================================== > --- config/osmvsel.m4 (revision 8998) > +++ config/osmvsel.m4 (working copy) > @@ -63,9 +63,9 @@ if test $with_osmv = "openib"; then > OSMV_CFLAGS="-DOSM_VENDOR_INTF_OPENIB" > OSMV_INCLUDES="-I\$(srcdir)/../include > -I\$(srcdir)/../../libibcommon/include/infiniband > -I\$(srcdir)/../../libibumad/include/infiniband" > if test "x$with_umad_libs" = "x"; then > - OSMV_LDADD="-libumad" > + OSMV_LDADD="-libumad -libverbs" > else > - OSMV_LDADD="-L$with_umad_libs -libumad" > + OSMV_LDADD="-L$with_umad_libs -libumad -libverbs" > fi > > if test "x$with_umad_includes" != "x"; then > @@ -137,6 +137,8 @@ if test "$disable_libcheck" != "yes"; th > LDFLAGS="$LDFLAGS $OSMV_LDADD" > AC_CHECK_LIB(ibumad, umad_init, [], > AC_MSG_ERROR([umad_init() not found. libosmvendor of type openib > requires libibumad.])) > + AC_CHECK_LIB(ibverbs, ibv_get_device_list, [], > + AC_MSG_ERROR([umad_init() not found. libosmvendor of type openib > requires libibverbs.])) > LD_FLAGS=$osmv_save_ldflags > elif test $with_osmv = "sim" ; then > LDFLAGS="$LDFLAGS -L$with_sim/lib" > _______________________________________________ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general