Isn’t a socket or a pipe the right way to address this issue?

  George.

On Apr 12, 2014, at 11:01 , svn-commit-mai...@open-mpi.org wrote:

> Author: rhc (Ralph Castain)
> Date: 2014-04-12 11:01:24 EDT (Sat, 12 Apr 2014)
> New Revision: 31376
> URL: https://svn.open-mpi.org/trac/ompi/changeset/31376
> 
> Log:
> Ensure we properly terminate the listening thread prior to exiting, but do so 
> in a way that doesn't make us wait for select to timeout.
> 
> Refs #4510
> 
> Text files modified: 
>   trunk/orte/mca/oob/tcp/oob_tcp_component.c |     9 ++++++---                
>                
>   trunk/orte/mca/oob/tcp/oob_tcp_component.h |     2 ++                       
>                
>   trunk/orte/mca/oob/tcp/oob_tcp_listener.c  |    17 ++++++++++++-----        
>                
>   3 files changed, 20 insertions(+), 8 deletions(-)
> 
> Modified: trunk/orte/mca/oob/tcp/oob_tcp_component.c
> ==============================================================================
> --- trunk/orte/mca/oob/tcp/oob_tcp_component.c        Sat Apr 12 10:43:20 
> 2014        (r31375)
> +++ trunk/orte/mca/oob/tcp/oob_tcp_component.c        2014-04-12 11:01:24 EDT 
> (Sat, 12 Apr 2014)      (r31376)
> @@ -140,8 +140,8 @@
>     if (ORTE_PROC_IS_HNP) {
>         OBJ_CONSTRUCT(&mca_oob_tcp_component.listen_thread, opal_thread_t);
>         mca_oob_tcp_component.listen_thread_active = false;
> -        mca_oob_tcp_component.listen_thread_tv.tv_sec = 0;
> -        mca_oob_tcp_component.listen_thread_tv.tv_usec = 300000;
> +        mca_oob_tcp_component.listen_thread_tv.tv_sec = 3600;
> +        mca_oob_tcp_component.listen_thread_tv.tv_usec = 0;
>     }
>     mca_oob_tcp_component.addr_count = 0;
>     OBJ_CONSTRUCT(&mca_oob_tcp_component.modules, opal_pointer_array_t);
> @@ -659,7 +659,10 @@
>                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
> 
>     if (ORTE_PROC_IS_HNP && mca_oob_tcp_component.listen_thread_active) {
> -        mca_oob_tcp_component.listen_thread_active = 0;
> +        mca_oob_tcp_component.listen_thread_active = false;
> +        /* tell the thread to exit */
> +        write(mca_oob_tcp_component.stop_thread, &i, sizeof(int));
> +        opal_thread_join(&mca_oob_tcp_component.listen_thread, NULL);
>     }
> 
>     while (NULL != (item = 
> opal_list_remove_first(&mca_oob_tcp_component.listeners))) {
> 
> Modified: trunk/orte/mca/oob/tcp/oob_tcp_component.h
> ==============================================================================
> --- trunk/orte/mca/oob/tcp/oob_tcp_component.h        Sat Apr 12 10:43:20 
> 2014        (r31375)
> +++ trunk/orte/mca/oob/tcp/oob_tcp_component.h        2014-04-12 11:01:24 EDT 
> (Sat, 12 Apr 2014)      (r31376)
> @@ -12,6 +12,7 @@
>  * Copyright (c) 2006-2013 Los Alamos National Security, LLC. 
>  *                         All rights reserved.
>  * Copyright (c) 2010-2011 Cisco Systems, Inc.  All rights reserved.
> + * Copyright (c) 2014      Intel, Inc. All rights reserved
>  * $COPYRIGHT$
>  * 
>  * Additional copyrights may follow
> @@ -77,6 +78,7 @@
>     opal_thread_t      listen_thread;          /**< handle to the listening 
> thread */
>     bool               listen_thread_active;
>     struct timeval     listen_thread_tv;       /**< Timeout when using listen 
> thread */
> +    int                stop_thread;            /**< file descriptor used to 
> exit the listen thread */
> 
>     /* peers available via this transport - the index is the process name,
>      * and the pointer returned is the pointer to the last module that
> 
> Modified: trunk/orte/mca/oob/tcp/oob_tcp_listener.c
> ==============================================================================
> --- trunk/orte/mca/oob/tcp/oob_tcp_listener.c Sat Apr 12 10:43:20 2014        
> (r31375)
> +++ trunk/orte/mca/oob/tcp/oob_tcp_listener.c 2014-04-12 11:01:24 EDT (Sat, 
> 12 Apr 2014)      (r31376)
> @@ -121,11 +121,11 @@
>     }
> #endif
> 
> -    /* if I am the HNP, create a separate event base for the
> -     * listening thread so we can harvest connection requests
> -     * as rapidly as possible
> +    /* if I am the HNP, start a listening thread so we can
> +     * harvest connection requests as rapidly as possible
>      */
>     if (ORTE_PROC_IS_HNP) {
> +        mca_oob_tcp_component.stop_thread = open("/dev/null", O_RDWR);
>         mca_oob_tcp_component.listen_thread_active = true;
>         mca_oob_tcp_component.listen_thread.t_run = listen_thread;
>         mca_oob_tcp_component.listen_thread.t_arg = NULL;
> @@ -641,15 +641,22 @@
>             FD_SET(listener->sd, &readfds);
>             max = (listener->sd > max) ? listener->sd : max;
>         }
> +        /* add the stop_thread fd */
> +        FD_SET(mca_oob_tcp_component.stop_thread, &readfds);
> +        max = (mca_oob_tcp_component.stop_thread > max) ? 
> mca_oob_tcp_component.stop_thread : max;
> +
>         /* set timeout interval */
>         timeout.tv_sec = mca_oob_tcp_component.listen_thread_tv.tv_sec;
>         timeout.tv_usec = mca_oob_tcp_component.listen_thread_tv.tv_usec;
> 
> -        /* Block in a select for a short (10ms) amount of time to
> -         * avoid hammering the cpu.  If a connection
> +        /* Block in a select to avoid hammering the cpu.  If a connection
>          * comes in, we'll get woken up right away.
>          */
>         rc = select(max + 1, &readfds, NULL, NULL, &timeout);
> +        if (!mca_oob_tcp_component.listen_thread_active) {
> +            /* we've been asked to terminate */
> +            return NULL;
> +        }
>         if (rc < 0) {
>             if (EAGAIN != opal_socket_errno && EINTR != opal_socket_errno) {
>                 perror("select");
> _______________________________________________
> svn mailing list
> s...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/svn

Reply via email to