Author: tfiala Date: Mon Sep 26 15:25:47 2016 New Revision: 282436 URL: http://llvm.org/viewvc/llvm-project?rev=282436&view=rev Log: added Linux support for test timeout sampling
This is the Linux counterpart to the sampling support I added on the macOS side. This change also introduces zip-file compression if the size of the sample output is greater than 10 KB. The Linux side can be quite large and the textual content is averaging over a 10x compression factor on tests that I force to time out. When compression takes place, the filename becomes: {session_dir}/{TestFilename.py}-{pid}.sample.zip This support relies on the linux 'perf' tool. If it isn't present, the behavior is to ignore pre-kill processing of the timed out test process. Note calling the perf tool under the timeout command appears to nuke the profiled process. This was causing the timeout kill logic to fail due to the process having disappeared. I modified the kill logic to catch the case of the process not existing, and I have it ignore the kill request in that case. Any other exception is still raised. Reviewers: labath Subscribers: lldb-commits Differential Revision: https://reviews.llvm.org/D24890 Added: lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/linux.py lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_linux.py - copied, changed from r282432, lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_darwin.py Modified: lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_darwin.py lldb/trunk/packages/Python/lldbsuite/test/dosep.py lldb/trunk/packages/Python/lldbsuite/test/test_runner/process_control.py Added: lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/linux.py URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/linux.py?rev=282436&view=auto ============================================================================== --- lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/linux.py (added) +++ lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/linux.py Mon Sep 26 15:25:47 2016 @@ -0,0 +1,76 @@ +"""Provides a pre-kill method to run on Linux. + +This timeout pre-kill method relies on the Linux perf-tools +distribution. The appropriate way to obtain this set of tools +will depend on the Linux distribution. + +For Ubuntu 16.04, the invoke the following command: +sudo apt-get install perf-tools-unstable +""" +from __future__ import print_function + +# system imports +import os +import subprocess +import sys +import tempfile + + +def do_pre_kill(process_id, runner_context, output_stream, sample_time=3): + """Samples the given process id, and puts the output to output_stream. + + @param process_id the local process to sample. + + @param runner_context a dictionary of details about the architectures + and platform on which the given process is running. Expected keys are + archs (array of architectures), platform_name, platform_url, and + platform_working_dir. + + @param output_stream file-like object that should be used to write the + results of sampling. + + @param sample_time specifies the time in seconds that should be captured. + """ + + # Validate args. + if runner_context is None: + raise Exception("runner_context argument is required") + if not isinstance(runner_context, dict): + raise Exception("runner_context argument must be a dictionary") + + # We will try to run sample on the local host only if there is no URL + # to a remote. + if "platform_url" in runner_context and ( + runner_context["platform_url"] is not None): + import pprint + sys.stderr.write( + "warning: skipping timeout pre-kill sample invocation because we " + "don't know how to run on a remote yet. runner_context={}\n" + .format(pprint.pformat(runner_context))) + + # We're going to create a temp file, and immediately overwrite it with the + # following command. This just ensures we don't have any races in + # creation of the temporary sample file. + fileno, filename = tempfile.mkstemp(suffix='perfdata') + os.close(fileno) + fileno = None + + try: + with open(os.devnull, 'w') as devnull: + returncode = subprocess.call(['timeout', str(sample_time), 'perf', + 'record', '-g', '-o', filename, '-p', str(process_id)], + stdout=devnull, stderr=devnull) + if returncode == 0 or returncode == 124: + # This is okay - this is the timeout return code, which is totally + # expected. + pass + else: + raise Exception("failed to call 'perf record .., error: {}".format( + returncode)) + + with open(os.devnull, 'w') as devnull: + output = subprocess.check_output(['perf', 'report', '--call-graph', + '--stdio', '-i', filename], stderr=devnull) + output_stream.write(output) + finally: + os.remove(filename) Modified: lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_darwin.py URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_darwin.py?rev=282436&r1=282435&r2=282436&view=diff ============================================================================== --- lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_darwin.py (original) +++ lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_darwin.py Mon Sep 26 15:25:47 2016 @@ -38,7 +38,7 @@ class DarwinPreKillTestCase(TestCase): print("parent: sending shut-down request to child") if self.process: self.child_work_queue.put("hello, child") - self.process.join() + self.process.join() if self.verbose: print("parent: child is fully shut down") Copied: lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_linux.py (from r282432, lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_darwin.py) URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_linux.py?p2=lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_linux.py&p1=lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_darwin.py&r1=282432&r2=282436&rev=282436&view=diff ============================================================================== --- lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_darwin.py (original) +++ lldb/trunk/packages/Python/lldbsuite/pre_kill_hook/tests/test_linux.py Mon Sep 26 15:25:47 2016 @@ -1,16 +1,25 @@ -"""Test the pre-kill hook on Darwin.""" +"""Test the pre-kill hook on Linux.""" from __future__ import print_function # system imports from multiprocessing import Process, Queue import platform import re +import subprocess from unittest import main, TestCase # third party from six import StringIO +def do_child_thread(): + import os + x = 0 + while True: + x = x + 42 * os.getpid() + return x + + def do_child_process(child_work_queue, parent_work_queue, verbose): import os @@ -18,6 +27,14 @@ def do_child_process(child_work_queue, p if verbose: print("child: pid {} started, sending to parent".format(pid)) parent_work_queue.put(pid) + + # Spin up a daemon thread to do some "work", which will show + # up in a sample of this process. + import threading + worker = threading.Thread(target=do_child_thread) + worker.daemon = True + worker.start() + if verbose: print("child: waiting for shut-down request from parent") child_work_queue.get() @@ -25,27 +42,38 @@ def do_child_process(child_work_queue, p print("child: received shut-down request. Child exiting.") -class DarwinPreKillTestCase(TestCase): +class LinuxPreKillTestCase(TestCase): def __init__(self, methodName): - super(DarwinPreKillTestCase, self).__init__(methodName) + super(LinuxPreKillTestCase, self).__init__(methodName) self.process = None self.child_work_queue = None self.verbose = False + # self.verbose = True def tearDown(self): if self.verbose: print("parent: sending shut-down request to child") if self.process: self.child_work_queue.put("hello, child") - self.process.join() + self.process.join() if self.verbose: print("parent: child is fully shut down") def test_sample(self): # Ensure we're Darwin. - if platform.system() != 'Darwin': - self.skipTest("requires a Darwin-based OS") + if platform.system() != 'Linux': + self.skipTest("requires a Linux-based OS") + + # Ensure we have the 'perf' tool. If not, skip the test. + try: + perf_version = subprocess.check_output(["perf", "version"]) + if perf_version is None or not ( + perf_version.startswith("perf version")): + raise Exception("The perf executable doesn't appear" + " to be the Linux perf tools perf") + except Exception: + self.skipTest("requires the Linux perf tools 'perf' command") # Start the child process. self.child_work_queue = Queue() @@ -63,7 +91,7 @@ class DarwinPreKillTestCase(TestCase): child_pid = parent_work_queue.get() # Sample the child process. - from darwin import do_pre_kill + from linux import do_pre_kill context_dict = { "archs": [platform.machine()], "platform_name": None, @@ -81,26 +109,24 @@ class DarwinPreKillTestCase(TestCase): print("parent: do_pre_kill() wrote the following output:", output) self.assertIsNotNone(output) - # We should have a line with: - # Process: .* [{pid}] - process_re = re.compile(r"Process:[^[]+\[([^]]+)\]") - match = process_re.search(output) - self.assertIsNotNone(match, "should have found process id for " - "sampled process") - self.assertEqual(1, len(match.groups())) - self.assertEqual(child_pid, int(match.group(1))) - - # We should see a Call graph: section. - callgraph_re = re.compile(r"Call graph:") - match = callgraph_re.search(output) - self.assertIsNotNone(match, "should have found the Call graph section" + # We should have a samples count entry. + # Samples: + self.assertTrue("Samples:" in output, "should have found a 'Samples:' " + "field in the sampled process output") + + # We should see an event count entry + event_count_re = re.compile(r"Event count[^:]+:\s+(\d+)") + match = event_count_re.search(output) + self.assertIsNotNone(match, "should have found the event count entry " "in sample output") + if self.verbose: + print("cpu-clock events:", match.group(1)) - # We should see a Binary Images: section. - binary_images_re = re.compile(r"Binary Images:") - match = binary_images_re.search(output) - self.assertIsNotNone(match, "should have found the Binary Images " - "section in sample output") + # We should see some percentages in the file. + percentage_re = re.compile(r"\d+\.\d+%") + match = percentage_re.search(output) + self.assertIsNotNone(match, "should have found at least one percentage " + "in the sample output") if __name__ == "__main__": Modified: lldb/trunk/packages/Python/lldbsuite/test/dosep.py URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/test/dosep.py?rev=282436&r1=282435&r2=282436&view=diff ============================================================================== --- lldb/trunk/packages/Python/lldbsuite/test/dosep.py (original) +++ lldb/trunk/packages/Python/lldbsuite/test/dosep.py Mon Sep 26 15:25:47 2016 @@ -243,7 +243,7 @@ class DoTestProcessDriver(process_contro except ImportError: # We don't have one for this platform. Skip. sys.stderr.write("\nwarning: no timeout handler module: " + - module_name) + module_name + "\n") return # Try to run the pre-kill-hook method. @@ -254,13 +254,26 @@ class DoTestProcessDriver(process_contro # Write the output to a filename associated with the test file and # pid. + MAX_UNCOMPRESSED_BYTE_COUNT = 10 * 1024 + + content = output_io.getvalue() + compress_output = len(content) > MAX_UNCOMPRESSED_BYTE_COUNT basename = "{}-{}.sample".format(self.file_name, self.pid) sample_path = os.path.join(g_session_dir, basename) - with open(sample_path, "w") as output_file: - output_file.write(output_io.getvalue()) + + if compress_output: + # Write compressed output into a .zip file. + from zipfile import ZipFile, ZIP_DEFLATED + zipfile = sample_path + ".zip" + with ZipFile(zipfile, "w", ZIP_DEFLATED) as sample_zip: + sample_zip.writestr(basename, content) + else: + # Write raw output into a text file. + with open(sample_path, "w") as output_file: + output_file.write(content) except Exception as e: sys.stderr.write("caught exception while running " - "pre-kill action: {}".format(e)) + "pre-kill action: {}\n".format(e)) return def is_exceptional_exit(self): Modified: lldb/trunk/packages/Python/lldbsuite/test/test_runner/process_control.py URL: http://llvm.org/viewvc/llvm-project/lldb/trunk/packages/Python/lldbsuite/test/test_runner/process_control.py?rev=282436&r1=282435&r2=282436&view=diff ============================================================================== --- lldb/trunk/packages/Python/lldbsuite/test/test_runner/process_control.py (original) +++ lldb/trunk/packages/Python/lldbsuite/test/test_runner/process_control.py Mon Sep 26 15:25:47 2016 @@ -360,18 +360,28 @@ class UnixProcessHelper(ProcessHelper): # Choose kill mechanism based on whether we're targeting # a process group or just a process. - if popen_process.using_process_groups: - # if log_file: - # log_file.write( - # "sending signum {} to process group {} now\n".format( - # signum, popen_process.pid)) - os.killpg(popen_process.pid, signum) - else: - # if log_file: - # log_file.write( - # "sending signum {} to process {} now\n".format( - # signum, popen_process.pid)) - os.kill(popen_process.pid, signum) + try: + if popen_process.using_process_groups: + # if log_file: + # log_file.write( + # "sending signum {} to process group {} now\n".format( + # signum, popen_process.pid)) + os.killpg(popen_process.pid, signum) + else: + # if log_file: + # log_file.write( + # "sending signum {} to process {} now\n".format( + # signum, popen_process.pid)) + os.kill(popen_process.pid, signum) + except OSError as error: + import errno + if error.errno == errno.ESRCH: + # This is okay - failed to find the process. It may be that + # that the timeout pre-kill hook eliminated the process. We'll + # ignore. + pass + else: + raise def soft_terminate(self, popen_process, log_file=None, want_core=True): # Choose signal based on desire for core file. _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits