On Sat, Oct 29, 2016 at 1:48 PM, Brent W. Baccala <cos...@freesoft.org> wrote:
> Yes, patch7 without patch8 can reorder messages as they're being resent, > but patch8 uses the mutex introduced in patch7, so the ordering can't be > reversed. > > They could be combined into a single patch. Do you want me to prepare a > combined patch and email it in? > OK, I'm attaching the combined patch7+patch8. I've studied it a bit more, and it has a race condition that looks unavoidable to me. rpctrace on ext2fs exposes the problem. ext2fs maps a region of memory that it manages itself (using libpager), then calls vm_copy() on that region, using its own task port. This causes the kernel to make a memory_object_data_request back to ext2fs, which makes some more requests (mach_port_deallocate and vm_allocate) on the task port before replying with a memory_object_data_supply, which allows the original vm_copy to return. So, it's calling vm_copy on its task port, and needs to do some more operations on the task port before the vm_copy completes. And just like the problem I described two months ago ("mach_msg blocking on call to vm_map"), the mach_msg call sending the vm_copy will block even if you're not waiting for the reply message. So... in rpctrace, I want to signal the condition variable (allowing the next message to be processed) after the message has been sent, but since mach_msg blocks, I can't do that without blocking the entire task port, which deadlocks the process. My solution is to signal the condition variable right before the call to mach_msg, but this creates a race condition where messages can get reordered. As you know, I never liked this blocking behavior of mach_msg, but I just can't see any way around it now. If you can suggest something, let me know... agape brent
From f12c28e9b16d7a3c628586b739eee3f4e004f753 Mon Sep 17 00:00:00 2001 From: Brent Baccala <cos...@freesoft.org> Date: Sun, 30 Oct 2016 16:13:34 -1000 Subject: [PATCH] multithread rpctrace to avoid deadlocks in the kernel --- utils/rpctrace.c | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/utils/rpctrace.c b/utils/rpctrace.c index c01f8d4..72ca614 100644 --- a/utils/rpctrace.c +++ b/utils/rpctrace.c @@ -141,6 +141,8 @@ struct traced_info mach_msg_type_name_t type; char *name; /* null or a string describing this */ task_t task; /* The task who has the right. */ + mach_port_seqno_t seqno; /* next RPC to be processed on this port */ + pthread_cond_t sequencer; /* used to sequence RPCs when they are processed out-of-order */ }; /* Each traced port has one receiver info and multiple send wrappers. @@ -250,6 +252,8 @@ struct port_class *other_class; struct port_bucket *traced_bucket; FILE *ostream; +pthread_mutex_t tracelock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; + /* These are the calls made from the tracing engine into the output formatting code. */ @@ -334,9 +338,13 @@ destroy_receiver_info (struct receiver_info *info) while (send_wrapper) { struct sender_info *next = send_wrapper->next; +#if 0 + if (refcounts_hard_references (&TRACED_INFO (send_wrapper)->pi.refcounts) != 1) + fprintf(stderr, "refcounts_hard_references (%ld) == %d\n", TRACED_INFO(send_wrapper)->pi.port_right, refcounts_hard_references (&TRACED_INFO (send_wrapper)->pi.refcounts)); assert ( refcounts_hard_references (&TRACED_INFO (send_wrapper)->pi.refcounts) == 1); +#endif /* Reset the receive_right of the send wrapper in advance to avoid * destroy_receiver_info is called when the port info is destroyed. */ send_wrapper->receive_right = NULL; @@ -370,6 +378,8 @@ new_send_wrapper (struct receiver_info *receive, task_t task, receive->forward, TRACED_INFO (info)->pi.port_right, task2pid (task)); TRACED_INFO (info)->type = MACH_MSG_TYPE_MOVE_SEND; TRACED_INFO (info)->task = task; + TRACED_INFO (info)->seqno = 0; + pthread_cond_init(& TRACED_INFO(info)->sequencer, NULL); info->receive_right = receive; info->next = receive->next; receive->next = info; @@ -400,6 +410,8 @@ new_send_once_wrapper (mach_port_t right, mach_port_t *wrapper_right, task_t tas sizeof *info, &info); assert_perror (err); TRACED_INFO (info)->name = 0; + TRACED_INFO (info)->seqno = 0; + pthread_cond_init(& TRACED_INFO(info)->sequencer, NULL); } info->forward = right; @@ -451,6 +463,8 @@ traced_clean (void *pi) { struct sender_info *info = pi; + pthread_mutex_lock(&tracelock); + assert (TRACED_INFO (info)->type == MACH_MSG_TYPE_MOVE_SEND); free (TRACED_INFO (info)->name); @@ -466,6 +480,8 @@ traced_clean (void *pi) info->receive_right = NULL; } + + pthread_mutex_unlock(&tracelock); } /* Check if the receive right has been seen. */ @@ -1144,6 +1160,8 @@ trace_and_forward (mach_msg_header_t *inp, mach_msg_header_t *outp) assert (info); + pthread_mutex_lock(&tracelock); + /* A notification message from the kernel appears to have been sent with a send-once right, even if there have never really been any. */ if (MACH_MSGH_BITS_LOCAL (inp->msgh_bits) == MACH_MSG_TYPE_MOVE_SEND_ONCE) @@ -1170,6 +1188,8 @@ trace_and_forward (mach_msg_header_t *inp, mach_msg_header_t *outp) /* It might be a task port. Remove the dead task from the list. */ remove_task (n->not_port); + pthread_mutex_unlock(&tracelock); + return 1; } else if (inp->msgh_id == MACH_NOTIFY_NO_SENDERS @@ -1182,6 +1202,7 @@ trace_and_forward (mach_msg_header_t *inp, mach_msg_header_t *outp) ports_no_senders (info, n->not_count); ports_port_deref (info); ((mig_reply_header_t *) outp)->RetCode = MIG_NO_REPLY; + pthread_mutex_unlock(&tracelock); return 1; } /* Get some unexpected notification for rpctrace itself, @@ -1190,6 +1211,7 @@ trace_and_forward (mach_msg_header_t *inp, mach_msg_header_t *outp) { ports_port_deref (info); ((mig_reply_header_t *) outp)->RetCode = MIG_NO_REPLY; + pthread_mutex_unlock(&tracelock); return 1; } } @@ -1201,6 +1223,11 @@ trace_and_forward (mach_msg_header_t *inp, mach_msg_header_t *outp) msgid = msgid_info (inp->msgh_id); + while (inp->msgh_seqno != TRACED_INFO (info)->seqno) + { + pthread_cond_wait (& TRACED_INFO (info)->sequencer, &tracelock); + } + /* Determine message's destination task */ if (INFO_SEND_ONCE(info)) @@ -1364,6 +1391,16 @@ trace_and_forward (mach_msg_header_t *inp, mach_msg_header_t *outp) } } + /* Advance our sequence number and signal any other thread waiting + * to process the next message on this port. + */ + TRACED_INFO (info)->seqno = inp->msgh_seqno + 1; + pthread_cond_broadcast (& TRACED_INFO (info)->sequencer); + + /* Unlock prior to resending message to avoid deadlocks in the kernel */ + ports_port_deref (info); + pthread_mutex_unlock(&tracelock); + /* Resend the message to the tracee. */ err = mach_msg (inp, MACH_SEND_MSG, inp->msgh_size, 0, MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); @@ -1377,8 +1414,6 @@ trace_and_forward (mach_msg_header_t *inp, mach_msg_header_t *outp) else assert_perror (err); - ports_port_deref (info); - /* We already sent the message, so the server loop shouldn't do it again. */ ((mig_reply_header_t *) outp)->RetCode = MIG_NO_REPLY; @@ -1390,7 +1425,7 @@ static void * trace_thread_function (void *arg) { struct port_bucket *const bucket = arg; - ports_manage_port_operations_one_thread (bucket, trace_and_forward, 0); + ports_manage_port_operations_multithread (bucket, trace_and_forward, 0, 0, NULL); return 0; } -- 2.6.4