Isn’t a socket or a pipe the right way to address this issue? George.
On Apr 12, 2014, at 11:01 , svn-commit-mai...@open-mpi.org wrote: > Author: rhc (Ralph Castain) > Date: 2014-04-12 11:01:24 EDT (Sat, 12 Apr 2014) > New Revision: 31376 > URL: https://svn.open-mpi.org/trac/ompi/changeset/31376 > > Log: > Ensure we properly terminate the listening thread prior to exiting, but do so > in a way that doesn't make us wait for select to timeout. > > Refs #4510 > > Text files modified: > trunk/orte/mca/oob/tcp/oob_tcp_component.c | 9 ++++++--- > > trunk/orte/mca/oob/tcp/oob_tcp_component.h | 2 ++ > > trunk/orte/mca/oob/tcp/oob_tcp_listener.c | 17 ++++++++++++----- > > 3 files changed, 20 insertions(+), 8 deletions(-) > > Modified: trunk/orte/mca/oob/tcp/oob_tcp_component.c > ============================================================================== > --- trunk/orte/mca/oob/tcp/oob_tcp_component.c Sat Apr 12 10:43:20 > 2014 (r31375) > +++ trunk/orte/mca/oob/tcp/oob_tcp_component.c 2014-04-12 11:01:24 EDT > (Sat, 12 Apr 2014) (r31376) > @@ -140,8 +140,8 @@ > if (ORTE_PROC_IS_HNP) { > OBJ_CONSTRUCT(&mca_oob_tcp_component.listen_thread, opal_thread_t); > mca_oob_tcp_component.listen_thread_active = false; > - mca_oob_tcp_component.listen_thread_tv.tv_sec = 0; > - mca_oob_tcp_component.listen_thread_tv.tv_usec = 300000; > + mca_oob_tcp_component.listen_thread_tv.tv_sec = 3600; > + mca_oob_tcp_component.listen_thread_tv.tv_usec = 0; > } > mca_oob_tcp_component.addr_count = 0; > OBJ_CONSTRUCT(&mca_oob_tcp_component.modules, opal_pointer_array_t); > @@ -659,7 +659,10 @@ > ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); > > if (ORTE_PROC_IS_HNP && mca_oob_tcp_component.listen_thread_active) { > - mca_oob_tcp_component.listen_thread_active = 0; > + mca_oob_tcp_component.listen_thread_active = false; > + /* tell the thread to exit */ > + write(mca_oob_tcp_component.stop_thread, &i, sizeof(int)); > + opal_thread_join(&mca_oob_tcp_component.listen_thread, NULL); > } > > while (NULL != (item = > opal_list_remove_first(&mca_oob_tcp_component.listeners))) { > > Modified: trunk/orte/mca/oob/tcp/oob_tcp_component.h > ============================================================================== > --- trunk/orte/mca/oob/tcp/oob_tcp_component.h Sat Apr 12 10:43:20 > 2014 (r31375) > +++ trunk/orte/mca/oob/tcp/oob_tcp_component.h 2014-04-12 11:01:24 EDT > (Sat, 12 Apr 2014) (r31376) > @@ -12,6 +12,7 @@ > * Copyright (c) 2006-2013 Los Alamos National Security, LLC. > * All rights reserved. > * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. > + * Copyright (c) 2014 Intel, Inc. All rights reserved > * $COPYRIGHT$ > * > * Additional copyrights may follow > @@ -77,6 +78,7 @@ > opal_thread_t listen_thread; /**< handle to the listening > thread */ > bool listen_thread_active; > struct timeval listen_thread_tv; /**< Timeout when using listen > thread */ > + int stop_thread; /**< file descriptor used to > exit the listen thread */ > > /* peers available via this transport - the index is the process name, > * and the pointer returned is the pointer to the last module that > > Modified: trunk/orte/mca/oob/tcp/oob_tcp_listener.c > ============================================================================== > --- trunk/orte/mca/oob/tcp/oob_tcp_listener.c Sat Apr 12 10:43:20 2014 > (r31375) > +++ trunk/orte/mca/oob/tcp/oob_tcp_listener.c 2014-04-12 11:01:24 EDT (Sat, > 12 Apr 2014) (r31376) > @@ -121,11 +121,11 @@ > } > #endif > > - /* if I am the HNP, create a separate event base for the > - * listening thread so we can harvest connection requests > - * as rapidly as possible > + /* if I am the HNP, start a listening thread so we can > + * harvest connection requests as rapidly as possible > */ > if (ORTE_PROC_IS_HNP) { > + mca_oob_tcp_component.stop_thread = open("/dev/null", O_RDWR); > mca_oob_tcp_component.listen_thread_active = true; > mca_oob_tcp_component.listen_thread.t_run = listen_thread; > mca_oob_tcp_component.listen_thread.t_arg = NULL; > @@ -641,15 +641,22 @@ > FD_SET(listener->sd, &readfds); > max = (listener->sd > max) ? listener->sd : max; > } > + /* add the stop_thread fd */ > + FD_SET(mca_oob_tcp_component.stop_thread, &readfds); > + max = (mca_oob_tcp_component.stop_thread > max) ? > mca_oob_tcp_component.stop_thread : max; > + > /* set timeout interval */ > timeout.tv_sec = mca_oob_tcp_component.listen_thread_tv.tv_sec; > timeout.tv_usec = mca_oob_tcp_component.listen_thread_tv.tv_usec; > > - /* Block in a select for a short (10ms) amount of time to > - * avoid hammering the cpu. If a connection > + /* Block in a select to avoid hammering the cpu. If a connection > * comes in, we'll get woken up right away. > */ > rc = select(max + 1, &readfds, NULL, NULL, &timeout); > + if (!mca_oob_tcp_component.listen_thread_active) { > + /* we've been asked to terminate */ > + return NULL; > + } > if (rc < 0) { > if (EAGAIN != opal_socket_errno && EINTR != opal_socket_errno) { > perror("select"); > _______________________________________________ > svn mailing list > s...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/svn