Before this change there were the following unneeded pthread calls:
$ seq 1e6 | ltrace -c -e 'pthread*' sort --parallel=1 | wc -l
% time seconds usecs/call calls function
------ ----------- ----------- --------- --------------------
39.13 0.031757 67 468 pthread_mutex_lock
37.96 0.030811 65 468 pthread_mutex_unlock
13.17 0.010691 65 162 pthread_cond_signal
2.15 0.001747 64 27 pthread_mutex_destroy
2.00 0.001620 60 27 pthread_mutex_init
0.70 0.000565 62 9 pthread_cond_destroy
0.64 0.000518 57 9 pthread_cond_init
------ ----------- ----------- --------- --------------------
100.00 0.081159 1170 total
* src/sort.c (sort): Avoid merge tree overhead when single threaded.
---
src/sort.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/src/sort.c b/src/sort.c
index 05d00cc11..ed16c2217 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -4189,16 +4189,26 @@ sort (char *const *files, size_t nfiles, char const
*output_file,
}
if (1 < buf.nlines)
{
- struct merge_node_queue queue;
- queue_init (&queue, nthreads);
- struct merge_node *merge_tree =
- merge_tree_init (nthreads, buf.nlines, line);
+ if (nthreads > 1)
+ {
+ struct merge_node_queue queue;
+ queue_init (&queue, nthreads);
+ struct merge_node *merge_tree =
+ merge_tree_init (nthreads, buf.nlines, line);
- sortlines (line, nthreads, buf.nlines, merge_tree + 1,
- &queue, tfp, temp_output);
+ sortlines (line, nthreads, buf.nlines, merge_tree + 1,
+ &queue, tfp, temp_output);
- merge_tree_destroy (nthreads, merge_tree);
- queue_destroy (&queue);
+ merge_tree_destroy (nthreads, merge_tree);
+ queue_destroy (&queue);
+ }
+ else
+ {
+ sequential_sort (line, buf.nlines,
+ line - buf.nlines, false);
+ for (size_t i = 0; i < buf.nlines; i++)
+ write_unique (line - i - 1, tfp, temp_output);
+ }
}
else
write_unique (line - 1, tfp, temp_output);
--
2.51.1