The setting and checking of 'done' contains a rare race where the signal
handler setting 'done' is run after checking to break the loop, but
before waiting in evlist__poll(). In this case, the main loop won't wake
up until either another signal is sent, or the perf data fd causes a
wake up.

The following simple script can trigger this condition (but you might
need to run it for several hours):
for ((i = 0; i >= 0; i++)) ; do
  echo "Loop $i"
  delay=$(echo "scale=4; 0.1 * $RANDOM/32768" | bc)
  ./perf record -- sleep 30000000 >/dev/null&
  pid=$!
  sleep $delay
  kill -TERM $pid
  echo "PID $pid"
  wait $pid
done

At some point, the loop will stall. Adding logging, even though perf has
received the SIGTERM and set 'done = 1', perf will remain sleeping until
a second signal is sent.

Signed-off-by: Anand K Mistry <amis...@google.com>

---

 tools/perf/builtin-record.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 1ab349abe90469..ce5fc3860131d2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -53,6 +53,7 @@
 #include <unistd.h>
 #include <sched.h>
 #include <signal.h>
+#include <sys/eventfd.h>
 #include <sys/mman.h>
 #include <sys/wait.h>
 #include <sys/types.h>
@@ -518,15 +519,19 @@ static int record__pushfn(struct mmap *map, void *to, 
void *bf, size_t size)
 
 static volatile int signr = -1;
 static volatile int child_finished;
+static int done_fd = -1;
 
 static void sig_handler(int sig)
 {
+       u64 tmp = 1;
        if (sig == SIGCHLD)
                child_finished = 1;
        else
                signr = sig;
 
        done = 1;
+       if (write(done_fd, &tmp, sizeof(tmp)) < 0)
+               pr_err("failed to signal wakeup fd\n");
 }
 
 static void sigsegv_handler(int sig)
@@ -1424,6 +1429,9 @@ static int __cmd_record(struct record *rec, int argc, 
const char **argv)
        int fd;
        float ratio = 0;
 
+       done_fd = eventfd(0, EFD_NONBLOCK);
+       evlist__add_pollfd(rec->evlist, done_fd);
+
        atexit(record__sig_exit);
        signal(SIGCHLD, sig_handler);
        signal(SIGINT, sig_handler);
-- 
2.26.2.645.ge9eca65c58-goog

Reply via email to