Hi Professor Eggert, On Sun, Dec 5, 2010 at 11:01 PM, Paul Eggert <egg...@cs.ucla.edu> wrote: > On 12/05/2010 09:16 PM, Chen Guo wrote: >> Before saying anything else, I should note that for mutexes, on 4 >> threads 20% of the time there's a segfault on a seemingly innocuous >> line in queue_insert (): >> node->queued = true > > It does sound like mutexes are the way to go, and that this bug > needs to be fixed. I assume that this is the call to queue_insert > from queue_check_insert_parent? What's the backtrace? (And > what patch are you using, to get mutexes?) >
I've attached the patch (inlined at the bottom). Here's the GDB crash, with backtrace. I also printed node->queued in GDB, so it's evident that its accessible. (gdb) run --parallel 2 rec_1M > /dev/null Starting program: /data/chen/Coding/Coreutils/test/sort-mutex --parallel 2 rec_1M > /dev/null [Thread debugging using libthread_db enabled] [New Thread 0x7fffcbb95710 (LWP 19850)] Program received signal SIGSEGV, Segmentation fault. queue_insert (queue=0x7fffffffe0c0, node=0x7ffff7ddc560) at sort.c:3202 3202 node->queued = true; (gdb) bt #0 queue_insert (queue=0x7fffffffe0c0, node=0x7ffff7ddc560) at sort.c:3202 #1 0x0000000000405844 in queue_check_insert_parent (lines=<value optimized out>, dest=<value optimized out>, nthreads=<value optimized out>, total_lines=<value optimized out>, parent=<value optimized out>, lo_child=<value optimized out>, queue=0x7fffffffe0c0, tfp=0x7ffff7bb9780, temp_output=0x0) at sort.c:3340 #2 merge_loop (lines=<value optimized out>, dest=<value optimized out>, nthreads=<value optimized out>, total_lines=<value optimized out>, parent=<value optimized out>, lo_child=<value optimized out>, queue=0x7fffffffe0c0, tfp=0x7ffff7bb9780, temp_output=0x0) at sort.c:3374 #3 sortlines (lines=<value optimized out>, dest=<value optimized out>, nthreads=<value optimized out>, total_lines=<value optimized out>, parent=<value optimized out>, lo_child=<value optimized out>, queue=0x7fffffffe0c0, tfp=0x7ffff7bb9780, temp_output=0x0) at sort.c:3515 #4 0x0000000000405c2b in sortlines (lines=0x7ffff7344030, dest=<value optimized out>, nthreads=<value optimized out>, total_lines=<value optimized out>, parent=<value optimized out>, lo_child=<value optimized out>, queue=0x7fffffffe0c0, tfp=0x7ffff7bb9780, temp_output=0x0) at sort.c:3494 #5 0x00000000004095e8 in sort (argc=<value optimized out>, argv=<value optimized out>) at sort.c:3804 #6 main (argc=<value optimized out>, argv=<value optimized out>) at sort.c:4576 (gdb) print node $1 = (struct merge_node *) 0x7ffff7ddc560 (gdb) print node->queued $2 = false And here's the patch: >From 5a1d0b8f1a4c6d7cd99da7376f8c03f8a4cc2be9 Mon Sep 17 00:00:00 2001 From: Chen Guo <cheng...@ucla.edu> Date: Mon, 6 Dec 2010 00:15:42 -0800 Subject: [PATCH] sort: change spinlocks to mutexes. * src/sort.c: (struct merge_node) Change member lock to mutex. All uses changed. --- src/sort.c | 14 +++++++------- 1 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/sort.c b/src/sort.c index a4c2cbf..9cfc814 100644 --- a/src/sort.c +++ b/src/sort.c @@ -241,7 +241,7 @@ struct merge_node struct merge_node *parent; /* Parent node. */ unsigned int level; /* Level in merge tree. */ bool queued; /* Node is already in heap. */ - pthread_spinlock_t lock; /* Lock for node operations. */ + pthread_mutex_t lock; /* Lock for node operations. */ }; /* Priority queue of merge nodes. */ @@ -3157,7 +3157,7 @@ compare_nodes (void const *a, void const *b) static inline void lock_node (struct merge_node *node) { - pthread_spin_lock (&node->lock); + pthread_mutex_lock (&node->lock); } /* Unlock a merge tree NODE. */ @@ -3165,7 +3165,7 @@ lock_node (struct merge_node *node) static inline void unlock_node (struct merge_node *node) { - pthread_spin_unlock (&node->lock); + pthread_mutex_unlock (&node->lock); } /* Destroy merge QUEUE. */ @@ -3479,7 +3479,7 @@ sortlines (struct line *restrict lines, struct line *restrict dest, node.parent = parent; node.level = parent->level + 1; node.queued = false; - pthread_spin_init (&node.lock, PTHREAD_PROCESS_PRIVATE); + pthread_mutex_init (&node.lock, NULL); /* Calculate thread arguments. */ unsigned long int lo_threads = nthreads / 2; @@ -3515,7 +3515,7 @@ sortlines (struct line *restrict lines, struct line *restrict dest, merge_loop (queue, total_lines, tfp, temp_output); } - pthread_spin_destroy (&node.lock); + pthread_mutex_destroy (&node.lock); } /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is @@ -3799,12 +3799,12 @@ sort (char * const *files, size_t nfiles, char const *output_file, node.parent = NULL; node.level = MERGE_END; node.queued = false; - pthread_spin_init (&node.lock, PTHREAD_PROCESS_PRIVATE); + pthread_mutex_init (&node.lock, NULL); sortlines (line, line, nthreads, buf.nlines, &node, true, &queue, tfp, temp_output); queue_destroy (&queue); - pthread_spin_destroy (&node.lock); + pthread_mutex_destroy (&node.lock); } else write_unique (line - 1, tfp, temp_output); -- 1.7.1
From 5a1d0b8f1a4c6d7cd99da7376f8c03f8a4cc2be9 Mon Sep 17 00:00:00 2001 From: Chen Guo <cheng...@ucla.edu> Date: Mon, 6 Dec 2010 00:15:42 -0800 Subject: [PATCH] sort: change spinlocks to mutexes. * src/sort.c: (struct merge_node) Change member lock to mutex. All uses changed. --- src/sort.c | 14 +++++++------- 1 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/sort.c b/src/sort.c index a4c2cbf..9cfc814 100644 --- a/src/sort.c +++ b/src/sort.c @@ -241,7 +241,7 @@ struct merge_node struct merge_node *parent; /* Parent node. */ unsigned int level; /* Level in merge tree. */ bool queued; /* Node is already in heap. */ - pthread_spinlock_t lock; /* Lock for node operations. */ + pthread_mutex_t lock; /* Lock for node operations. */ }; /* Priority queue of merge nodes. */ @@ -3157,7 +3157,7 @@ compare_nodes (void const *a, void const *b) static inline void lock_node (struct merge_node *node) { - pthread_spin_lock (&node->lock); + pthread_mutex_lock (&node->lock); } /* Unlock a merge tree NODE. */ @@ -3165,7 +3165,7 @@ lock_node (struct merge_node *node) static inline void unlock_node (struct merge_node *node) { - pthread_spin_unlock (&node->lock); + pthread_mutex_unlock (&node->lock); } /* Destroy merge QUEUE. */ @@ -3479,7 +3479,7 @@ sortlines (struct line *restrict lines, struct line *restrict dest, node.parent = parent; node.level = parent->level + 1; node.queued = false; - pthread_spin_init (&node.lock, PTHREAD_PROCESS_PRIVATE); + pthread_mutex_init (&node.lock, NULL); /* Calculate thread arguments. */ unsigned long int lo_threads = nthreads / 2; @@ -3515,7 +3515,7 @@ sortlines (struct line *restrict lines, struct line *restrict dest, merge_loop (queue, total_lines, tfp, temp_output); } - pthread_spin_destroy (&node.lock); + pthread_mutex_destroy (&node.lock); } /* Scan through FILES[NTEMPS .. NFILES-1] looking for a file that is @@ -3799,12 +3799,12 @@ sort (char * const *files, size_t nfiles, char const *output_file, node.parent = NULL; node.level = MERGE_END; node.queued = false; - pthread_spin_init (&node.lock, PTHREAD_PROCESS_PRIVATE); + pthread_mutex_init (&node.lock, NULL); sortlines (line, line, nthreads, buf.nlines, &node, true, &queue, tfp, temp_output); queue_destroy (&queue); - pthread_spin_destroy (&node.lock); + pthread_mutex_destroy (&node.lock); } else write_unique (line - 1, tfp, temp_output); -- 1.7.1