Hi Folks: With limited testing the problem ceases to happen if you force uv_run() in the IO_Task() enough to finish its pending work. As an interim measure I do this by making the Protocol_Task() to yield the CPU after calling uv_stop() and up_poll_stop() as follows in the RELEASE_CONNECTION() routine. This appears to cause IO_Task() to be scheduled and run but I am not all all convinced this is a reliable technique.
// // Deactive and release the poll handle. // You have stop the Poll_Loop to deactivate and deallocate the poll handle. // uv_stop(&Poll_Loop); uv_poll_stop(cdesc->poll_handle); #ifdef CLOSE_KLUDGE2 // // Try to let run() in the IO_Task() finish pending work by yielding the CPU. // for(k = 0; k < 10; k++) pthread_yield(); #endif // CLOSE_KLUDGE2 uv_close((uv_handle_t *) cdesc->poll_handle, close_callback); Best Regards, Paul R. On Sunday, December 20, 2020 at 10:13:34 AM UTC-8 pa...@rcom-software.com wrote: > Hi Folks: > > I made some progress on the problem but it is definitely not solved. The > updated code > and more diagnostic code are included in the message. > > NOTE: I am using the GIT HUB distribution from the following link on > Ubuntu Linux version 15.04. > > https://github.com/nikhilm/uvbook > > The Libuv software package looks like version 1.3.0. > > I have had to take extraordinary measures to make connection release > reliable. > The relevant code is included at near end of this message and the > extraordinary > measures are in the CLOSE_KLUDGE sections. The difficulty arises because > the > Libuv loops are not used in the Protocol_Task() yet it must affect > operations > on those loops to release handles. It would be nice if Libuv included an > API > for releasing handles reliably which could be called from any task. > > Connection release still fails about 15% of the time in which case a crash > occurs > and the following diagnostic is displayed. > > pexd: src/unix/core.c:210: uv__finish_close: Assertion > `!(handle->flags & UV_CLOSED)' failed. > > More diagnostic information follows. Do you know what causes this crash ? > > Best Regards, > > Paul Romero > > > Crash Diagnostics > ----------------- > The crash occurs when run() is executing in the IO_Task() in network_io.c > according to the following > GBD stack trace. > > #0 0x00007f281754c267 in __GI_raise (sig=sig@entry=6) at > ../sysdeps/unix/sysv/linux/raise.c:55 > #1 0x00007f281754deca in __GI_abort () at abort.c:89 > #2 0x00007f281754503d in __assert_fail_base (fmt=0x7f28176a7028 > "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", > assertion=assertion@entry=0x41e093 "!(handle->flags & UV_CLOSED)", > file=file@entry=0x41e068 "src/unix/core.c", > line=line@entry=210, function=function@entry=0x41e2b0 > <__PRETTY_FUNCTION__.9522> "uv__finish_close") at assert.c:92 > #3 0x00007f28175450f2 in __GI___assert_fail > (assertion=assertion@entry=0x41e093 "!(handle->flags & UV_CLOSED)", > file=file@entry=0x41e068 "src/unix/core.c", line=line@entry=210, > function=function@entry=0x41e2b0 <__PRETTY_FUNCTION__.9522> > "uv__finish_close") at assert.c:101 > #4 0x000000000040c967 in uv__finish_close (handle=<optimized out>) at > src/unix/core.c:210 > #5 uv__run_closing_handles (loop=0x638080 <Poll_Loop>) at > src/unix/core.c:259 > #6 uv_run (loop=0x638080 <Poll_Loop>, mode=UV_RUN_DEFAULT) at > src/unix/core.c:326 > #7 0x0000000000404962 in IO_Task (arg=0x0) at network_io.c:226 > #8 0x0000000000412ad7 in uv__thread_start (arg=<optimized out>) at > src/unix/thread.c:49 > #9 0x00007f2817bf06aa in start_thread (arg=0x7f2816d15700) at > pthread_create.c:333 > #10 0x00007f281761deed in clone () at > ../sysdeps/unix/sysv/linux/x86_64/clone.S:109 > > However, the GDB thread information indicates that RELEASE_CONNECTION(), > in protocol.c, is executing > in the Protocol_Task() when the crash occurs. > > Id Target Id Frame > 6 Thread 0x7f2817516700 (LWP 3424) syscall () at > ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38 > 5 Thread 0x7f2816514700 (LWP 3426) pthread_cond_wait@@GLIBC_2.3.2 () > at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 > 4 Thread 0x7f2818003700 (LWP 3423) syscall () at > ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38 > 3 Thread 0x7f2815512700 (LWP 3428) pthread_cond_wait@@GLIBC_2.3.2 () > at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 > 2 Thread 0x7f2815d13700 (LWP 3427) 0x0000000000404500 in > RELEASE_CONNECTION (cdesc=0x6384c0 <Conn_Desc_Table>) > at protocol.c:357 > * 1 Thread 0x7f2816d15700 (LWP 3425) 0x00007f281754c267 in __GI_raise > (sig=sig@entry=6) > at ../sysdeps/unix/sysv/linux/raise.c:55 > > Line 357 of protocol.c is as follows. > > while(WaitClose[cdesc->index]); > > Wait_Close[] is only modified in two cases and only in the Protocol_Task(). > > 1) It is initialized to a handle address in RELEASE_CONNECTION() in the > Protocol_Task(). > 2) It is cleared in the uv_close() callback routine close_callback(). > > > Code > ----- > > #define CLOSE_KLUDGE > > extern uv_loop_t Poll_Loop; > extern uv_loop_t Connect_Loop; > > #ifdef CLOSE_KLUDGE > uv_handle_t *WaitClose[MAX_CONN_DESC] = { NULL }; > #endif // CLOSE_KLUDGE > > ROUTINE void close_callback(uv_handle_t *handle) > { > int k; > > free(handle); > > #ifdef CLOSE_KLUDGE > // > // Determine if the handle is being closed. > // > for(k = 0; k < MAX_CONN_DESC; k++) > { > if(WaitClose[k] == handle) > { > // > // Closure is complete. > // > WaitClose[k] = NULL; > break; > } > } > #endif // CLOSE_KLUDGE > > return; > } > > ROUTINE void RELEASE_CONNECTION(CONN_DESC *cdesc) > { > uv_async_t as_handle; > struct linger spec; > > if(N_Sockets > 0) > N_Sockets--; > // > // This causes immediate socket disconnection when it is closed. > // > spec.l_onoff = TRUE; > spec.l_linger = 0; > setsockopt(cdesc->fd, SOL_SOCKET, SO_LINGER, &spec, sizeof(spec) ); > > if(cdesc->poll_handle) > { > #ifdef CLOSE_KLUDGE > WaitClose[cdesc->index] = (uv_handle_t *) cdesc->poll_handle; > #endif // CLOSE_KLUDGE > // > // Deactive and release the poll handle. > // You have stop the Poll_Loop to deactivate and deallocate the > poll handle. > // > uv_stop(&Poll_Loop); > > uv_poll_stop(cdesc->poll_handle); > uv_close((uv_handle_t *) cdesc->poll_handle, close_callback); > // > // Wake up the Poll_Loop in the IO_Task() > // > uv_async_init(&Poll_Loop, &as_handle, NULL); > uv_async_send(&as_handle); > uv_close((uv_handle_t *) &as_handle, NULL); > #ifdef CLOSE_KLUDGE > // > // Wait for the handle to be closed and deallocated. > // > while(WaitClose[cdesc->index]); > #endif // CLOSE_KLUDGE > } > > if(cdesc->conn_handle) > { > #ifdef CLOSE_KLUDGE > WaitClose[cdesc->index] = (uv_handle_t *) cdesc->conn_handle; > #endif // CLOSE_KLUDGE > // > // Close and deallocate the connect handle in order to close the > socket connecction. > // You have to wake up the Connect_Loop for the close_callback() > // routine to execute. > // > uv_close((uv_handle_t *) cdesc->conn_handle, close_callback); > // > // Wake up the Connect_Loop in the main() process. > // > uv_async_init(&Connect_Loop, &as_handle, NULL); > uv_async_send(&as_handle); > uv_close((uv_handle_t *) &as_handle, NULL); > #ifdef CLOSE_KLUDGE > // > // Wait for the handle and socket connection to be release and > closed. > // > while(WaitClose[cdesc->index]); > #endif // CLOSE_KLUDGE > } > > > ENTER_MUTEX(&Service_Q_Mutex); > DELETE_CONN(cdesc); > cdesc->fd = -1; > flush_msg(&cdesc->task_input_q); > EXIT_MUTEX(&Service_Q_Mutex); > > return; > } > > On Sunday, December 20, 2020 at 3:47:07 AM UTC-8 pa...@rcom-software.com > wrote: > >> Hi Folks: >> >> My Libuv based Server performs all its functions correctly except for TCP >> connection termination. >> >> Each TCP connection has uv_tcp_t connection handle and uv_poll_t handle >> whose allocation >> and operation are explained below. When the Protocol_Task() thread needs >> to terminate >> a connection, it must stop polling, terminate the TCP socket connection, >> and deallocate >> the handles. >> >> NOTE: I am using the GIT HUB distribution from the following link on >> Ubuntu Linux version 15.04. >> >> https://github.com/nikhilm/uvbook >> >> I have tried the following two approaches. >> >> 1) Just use uv_poll_stop() to terminate polling and uv_close() to >> terminate the TCP connection. >> >> 2) Use uv_poll_stop() to terminate polling and the using uv_queue_work() >> and uv_async_send() to >> wake up the Connect_Loop, in the main() process described below, so it >> can terminate the >> TCP connection, by proxy, with uv_close(). >> >> In both cases the following problem occurs. The callback routine supplied >> to uv_close() >> does not execute until another incoming TCP connection occurs, and in >> most cases, >> the Pool_Loop, in the IO_Task() described below, stops invoking it >> callback routine-- >> poll_callback(). In case 2, a crash almost alway ensues. (I probably am >> not using >> uv_async_send() correctly.) >> >> Do I have a fundamental misunderstanding of how Libuv works or am I doing >> something wrong ? >> >> Also, I strongly suspect using Linux recv() to read data is not optimal >> when epoll() is >> being used. My understanding is that there is a way to pass buffers to >> epoll() such that >> data will automatically be inserted in them when a UV_READABLE event >> occurs. Do you have >> any advice about this ? >> >> An overview of my Server and the relevant code follow. >> >> Best Regards, >> >> Paul Romero >> >> Multi-Connection TCP Server Functional Architecture Overview >> >> ----------------------------------------------------------------------------------------- >> There is a connection descriptor for each incoming TCP connection which >> contains all data >> needed to manage the connection and perform the relevant functions. >> >> When the main() process detects an incoming TCP connection, it sends a >> notification message to the >> IO_Trigger_Task(). The IO_Trigger_Task() then sets up epoll() monitoring >> of incoming TCP data >> for that connection. >> >> Subsequently, the IO_Task() invokes poll_callback() when incoming data is >> available, reads a chunk >> of data, and sends a protocol message to the Protocol_Task() when a >> complete protocol message is >> recognized. >> >> The Timer_Task() sends an expiration notification message to the >> Protocol_Task() when a protocol >> timer expires. >> >> The Protocol_Task() send messages to the Send_Op_Task() for transmission >> across the network. >> It spawns a DB Operation Task to perform slow data base operations and >> the DB Operation Task >> notifies the Protocol_Task() when the operation is complete and then >> terminates. >> >> Loops of type uv_loop_t >> ----------------------- >> * Connect_Loop >> * Pool_Loop >> * Timer_Loop` >> >> Tasks: All Libuv thread tasks run concurrently and are launched by main() >> at startup time. >> >> ------------------------------------------------------------------------------------------ >> * main(): A Linux process that runs the Connect_Loop to detect incoming >> TCP connections. >> The make_incoming_connection() callback routine accepts incoming >> connections and >> allocates a uv_tcp_t handle on a per connection basis >> >> * IO_Trigger_Task(): A Libuv thread that sets up epoll() plumbing for the >> IO_Task() >> when an incoming TCP connection occurs. It allocates a uv_poll_t >> handle, on a per >> connection basis, and calls uv_poll_start() to initiate epoll() >> operation with the >> Poll_Loop in the IO_Task(). It configures the handle to detect >> UV_READABLE events and >> handles them with the poll_callback() routine. However, it does not >> run the Poll_Loop. >> (Basically, this task just sets up plumbing.) >> >> * IO_Task(): A Libuv thread that runs the Poll_Loop to handle incoming >> TCP data, on a per >> connection basis. The poll_callback() routine executes and uses normal >> Linux recv() to read >> chunks of data, in non-blocking mode, when a UV_READABLE event occurs. >> >> * Timer_Task(): A Libuv thread that runs the Time_Loop to handle ticks, >> and whose main >> function is to detect protocol timer expiration. The tick duration is >> configured with >> is configured with uv_timer_init() and uv_timer_start(), and ticks are >> handled by the >> timer_callback() routine. >> >> * Protocol_Task(): A Libuv thread that handles protocol messages sent to >> it by the following tasks >> on per connection basis: IO_Task(), Timer_Task(), DB Operation Tasks. >> DB Operation Libuv thread tasks >> are spawned by the Protocol_Task() to perform slow database operations >> and send a notification message >> to the Protocol_Task() upon completion of the operation. >> >> * Send_Op_Task(): A Libuv thread that transmits all network bound >> messages with normal >> Linux send() on a per connection basis. >> >> >> Approach 1 Code >> ------------- >> ROUTINE void close_callback(uv_handle_t *handle) >> { >> >> free(handle); >> return; >> } >> >> ROUTINE void RELEASE_CONNECTION(CONN_DESC *cdesc) >> { >> struct linger spec; >> int r; >> >> if(N_Sockets > 0) >> N_Sockets--; >> >> if(cdesc->poll_handle) >> { >> uv_poll_stop(cdesc->poll_handle); >> free((void *) cdesc->poll_handle); >> } >> >> if(cdesc->conn_handle) >> { >> struct linger spec; >> >> spec.l_onoff = TRUE; >> spec.l_linger = 0; >> setsockopt(cdesc->fd, SOL_SOCKET, SO_LINGER, &spec, sizeof(spec) >> ); >> >> uv_close((uv_handle_t *) cdesc->conn_handle, close_callback); >> } >> >> ENTER_MUTEX(&Service_Q_Mutex); >> DELETE_CONN(cdesc); >> cdesc->fd = -1; >> flush_msg(&cdesc->task_input_q); >> EXIT_MUTEX(&Service_Q_Mutex); >> >> return; >> } >> >> Approach 2 Code >> ----------------- >> ROUTINE void close_callback(uv_handle_t *handle) >> { >> free(handle); >> return; >> } >> >> typedef struct close_template { >> uv_handle_t *handle; >> void (*callback) (uv_handle_t *); >> } CLOSE_TEMPLATE; >> >> ROUTINE void close_proxy(uv_work_t *data) >> { >> CLOSE_TEMPLATE *cparam = (CLOSE_TEMPLATE *) cparam; >> >> uv_close(cparam->handle, cparam->callback); >> return; >> } >> >> >> extern uv_loop_t Connect_Loop; >> static CLOSE_TEMPLATE close_data; >> >> ROUTINE void RELEASE_CONNECTION(CONN_DESC *cdesc) >> { >> uv_work_t wreq; >> uv_async_t as_handle; >> struct linger spec; >> >> if(N_Sockets > 0) >> N_Sockets--; >> >> // >> // Stop this. TBD: Might need to do this via proxy in the IO_Task() >> Poll_Loop. >> // >> uv_poll_stop(cdesc->poll_handle); >> >> uv_async_init(&Connect_Loop, &as_handle, NULL); >> >> close_data.handle = (uv_handle_t *) cdesc->conn_handle; >> close_data.callback = close_callback; >> // >> // Call uv_close() in the close_proxy() >> // >> wreq.data = (void *) &close_data; >> uv_queue_work(&Connect_Loop, &wreq, close_proxy, NULL); >> >> spec.l_onoff = TRUE; >> spec.l_linger = 0; >> setsockopt(cdesc->fd, SOL_SOCKET, SO_LINGER, &spec, sizeof(spec) ); >> >> uv_async_send(&as_handle); >> uv_close((uv_handle_t *) &as_handle, NULL); >> >> free(cdesc->poll_handle); >> >> ENTER_MUTEX(&Service_Q_Mutex); >> DELETE_CONN(cdesc); >> cdesc->fd = -1; >> flush_msg(&cdesc->task_input_q); >> EXIT_MUTEX(&Service_Q_Mutex); >> >> return; >> } >> > -- You received this message because you are subscribed to the Google Groups "libuv" group. To unsubscribe from this group and stop receiving emails from it, send an email to libuv+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/libuv/5d4266c3-2a1d-499d-b938-cfa2c4c34f2bn%40googlegroups.com.