This patch: * creates a new "checkers" top-level directory to hold harnesses for 3rd-party code-checking tools. * adds a "checker.py" Python module for use when implementing such harnesses
3rd-party code-checking tools are expected to be passed command-line arguments by the frontend, and to return a JSON result on stdout; the job of each harness is to coerce the output from the tool into the expected JSON output format. The JSON format to be used is the "Firehose" serialization format: http://firehose.readthedocs.io/en/latest/index.html checkers/ChangeLog: * ChangeLog: New file. * checker.py: New file. --- checkers/ChangeLog | 9 ++ checkers/checker.py | 367 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 376 insertions(+) create mode 100644 checkers/ChangeLog create mode 100755 checkers/checker.py diff --git a/checkers/ChangeLog b/checkers/ChangeLog new file mode 100644 index 0000000..9189883 --- /dev/null +++ b/checkers/ChangeLog @@ -0,0 +1,9 @@ +2017-08-03 David Malcolm <dmalc...@redhat.com> + + * ChangeLog: New ChangeLog file. + +Copyright (C) 2017 Free Software Foundation, Inc. + +Copying and distribution of this file, with or without modification, +are permitted in any medium without royalty provided the copyright +notice and this notice are preserved. diff --git a/checkers/checker.py b/checkers/checker.py new file mode 100755 index 0000000..262bd72 --- /dev/null +++ b/checkers/checker.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python +# Copyright 2012, 2013, 2015, 2017 David Malcolm <dmalc...@redhat.com> +# Copyright 2012, 2013, 2015, 2017 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# <http://www.gnu.org/licenses/>. + +""" +A "checker" is an executable which takes GCC-style command-line +arguments and writes a Firehose JSON file to stdout. +""" + +import json +import logging +import os +import re +import sys +import tempfile +import time +import traceback +import unittest + +if sys.version_info[0] < 3: + # http://pypi.python.org/pypi/subprocess32 + # so that we can use timeouts + from subprocess32 import Popen, PIPE, STDOUT, TimeoutExpired +else: + from subprocess import Popen, PIPE, STDOUT, TimeoutExpired + +from firehose.model import Analysis, Generator, Metadata, Failure, \ + Location, File, Message, Issue, Trace + +from gccinvocation import GccInvocation + +def make_file(givenpath): + from firehose.model import File + return File(givenpath=givenpath, + abspath=None, + hash_=None) + +def make_stats(timer): + from firehose.model import Stats + return Stats(wallclocktime=timer.get_elapsed_time()) + +class Timer: + """ + Simple measurement of wallclock time taken + """ + def __init__(self): + self.starttime = time.time() + + def get_elapsed_time(self): + """Get elapsed time in seconds as a float""" + curtime = time.time() + return curtime - self.starttime + + def elapsed_time_as_str(self): + """Get elapsed time as a string (with units)""" + elapsed = self.get_elapsed_time() + result = '%0.3f seconds' % elapsed + if elapsed > 120: + result += ' (%i minutes)' % int(elapsed / 60) + return result + +class Context: + def __init__(self, name, enable_logging=False, capture_exceptions=True): + self.name = name + self.enable_logging = enable_logging + self.capture_exceptions = capture_exceptions + if self.enable_logging: + format_ = '%(asctime)s ' + name + ': %(message)s' + logging.basicConfig(format=format_, + level=logging.INFO, + stream=sys.stderr) + self.log('logging initialized') + + self.stdout = sys.stdout + self.stderr = sys.stderr + self.returncode = None + + def log(self, msg): + if self.enable_logging: + logging.info(msg) + + def write_streams(self, toolname, out, err): + for line in out.splitlines(): + self.log('stdout from %r: %s\n' % (toolname, line)) + for line in err.splitlines(): + self.log('stderr from %r: %s\n' % (toolname, line)) + +class SubprocessResult: + """ + A bundle of information relating to a subprocess invocation. + """ + def __init__(self, sourcefile, argv, returncode, out, err, timer): + self.sourcefile = sourcefile + self.argv = argv + self.returncode = returncode + self.out = out + self.err = err + self.timer = timer + + def set_custom_fields(self, analysis): + analysis.set_custom_field('returncode', self.returncode) + analysis.set_custom_field('stdout', self.out.decode('utf-8')) + analysis.set_custom_field('stderr', self.err.decode('utf-8')) + + def __repr__(self): + return 'SubprocessResult(%r, %r, %r, %r, %r, %r)' \ + % (self.sourcefile, self.argv, self.returncode, + self.out, self.err, self.timer) + +class Checker: + def __init__(self, ctxt): + self.name = ctxt.name + self.timeout = 60 + self.ctxt = ctxt + + def log(self, msg): + self.ctxt.log(msg) + + def raw_invoke(self, gccinv, sourcefile): + """ + Run the tool, with a timeout, returning an Analysis instance. + May well raise an exception if something major went wrong. + """ + raise NotImplementedError + + def checked_invoke(self, gccinv, sourcefile): + """ + Call "invoke", handling exceptions. Return an Analysis instance. + """ + try: + self.log('about to invoke: %s with %r' % (self.name, gccinv)) + analysis = self.raw_invoke(gccinv, sourcefile) + except TimeoutExpired: + analysis = self._make_failed_analysis( + sourcefile, None, msgtext='Timeout running %s' % self.name, + failureid='timeout') + except Exception as exc: + # Capture the exception as a Failure instance. + # Alternatively when debugging such failures, it can + # be easier to re-raise the exception: + if not self.ctxt.capture_exceptions: + raise + analysis = \ + self._make_failed_analysis( + sourcefile, None, + msgtext=('Exception running %s: %s' + % (self.name, exc)), + failureid='exception') + tb_str = traceback.format_exc() + analysis.set_custom_field('traceback', tb_str) + if sourcefile: + if not analysis.metadata.file_: + analysis.metadata.file_ = make_file(givenpath=sourcefile) + analysis.metadata.file_.givenpath = sourcefile + analysis.metadata.file_.abspath = os.path.join(os.getcwd(), + sourcefile) + return analysis + + def handle_output(self, result): + """ + Given a SubprocessResult, return an Analysis instance. + """ + raise NotImplementedError + + def _make_failed_analysis(self, sourcefile, t, msgtext, failureid): + """ + Something went wrong; build a failure report. + """ + generator = Generator(name=self.name, + version=None) + if t: + stats = make_stats(t) + else: + stats = None + + metadata = Metadata(generator=generator, + sut=None, + file_ = make_file(sourcefile), + stats=stats) + file_ = File(givenpath=sourcefile, + abspath=None, + hash_=None) + location = Location(file=file_, + function=None, + point=None, + range_=None) + message = Message(msgtext) + results = [Failure(failureid=failureid, + location=location, + message=message, + customfields=None)] + analysis = Analysis(metadata, results) + return analysis + + def _run_subprocess(self, sourcefile, argv, env=None): + """ + Support for running the bulk of the side effect in a subprocess, + with timeout support. + """ + self.log('%s: _run_subprocess(%r, %r)' % (self.name, sourcefile, argv)) + if 0: + self.log('env: %s' % env) + p = Popen(argv, + stdout=PIPE, stderr=PIPE, env=env) + try: + t = Timer() + out, err = p.communicate(timeout=self.timeout) + self.ctxt.write_streams(argv[0], out, err) + result = SubprocessResult(sourcefile, argv, p.returncode, out, err, t) + analysis = self.handle_output(result) + return analysis + except TimeoutExpired: + analysis = self._make_failed_analysis(sourcefile, t, + msgtext='Timeout running %s' % self.name, + failureid='timeout') + analysis.set_custom_field('timeout', self.timeout) + analysis.set_custom_field('command-line', ' '.join(argv)) + return analysis + + def run_subprocess(self, sourcefile, argv, env=None): + """ + Support for running the bulk of the side effect in a subprocess, + with timeout support. + """ + self.log('%s: run_subprocess(%r, %r)' % (self.name, sourcefile, argv)) + if 0: + self.log('env: %s' % env) + p = Popen(argv, stdout=PIPE, stderr=PIPE, env=env) + t = Timer() + out, err = p.communicate(timeout=self.timeout) + self.ctxt.write_streams(argv[0], out, err) + result = SubprocessResult(sourcefile, argv, p.returncode, out, err, t) + return result + +############################################################################ +# Test suite +############################################################################ + +class CheckerTests(unittest.TestCase): + def make_ctxt(self, name, capture_exceptions=False): + return Context(name, enable_logging=0, capture_exceptions=capture_exceptions) + + def make_tool_from_class(self, tool_class): + ctxt = self.make_ctxt(tool_class.name) + return tool_class(ctxt) + + def make_tool(self): + """Hook for self.make_compiler()""" + raise NotImplementedError + + def verify_basic_metadata(self, analysis, sourcefile): + """Hook for self.invoke()""" + raise NotImplementedError + + def invoke(self, sourcefile, extraargs = None): + """Invoke a tool and sanity-check the result""" + tool = self.make_tool() + argv = ['gcc', '-c', sourcefile] + if extraargs: + argv += extraargs + gccinv = GccInvocation(argv) + analysis = tool.checked_invoke(gccinv, sourcefile) + + if 0: + print(analysis) + + # Call a subclass hook to check basic metadata: + self.verify_basic_metadata(analysis, sourcefile) + + # Verify that we can serialize to XML: + xml_bytes = analysis.to_xml_bytes() + self.assertTrue(xml_bytes.startswith(b'<analysis>')) + + # Verify it can roundtrip through JSON: + js_bytes = json.dumps(analysis.to_json(), indent=1) + other = Analysis.from_json(json.loads(js_bytes)) + #self.assertEqual(analysis, other) + + return analysis + + def assert_metadata(self, analysis, + expected_generator_name, expected_given_path): + self.assertEqual(analysis.metadata.generator.name, + expected_generator_name) + self.assertEqual(analysis.metadata.file_.givenpath, expected_given_path) + self.assertIn(expected_given_path, analysis.metadata.file_.abspath) + + def assert_has_custom_field(self, analysis, name): + self.assertTrue(analysis.customfields) + self.assertTrue(name in analysis.customfields) + +class BuggyCheckerTests(CheckerTests): + def make_tool(self): + """ + Override base class impl, so that we can enable + exception-capture (and provide a custom tool) + """ + class BuggyChecker(Checker): + def raw_invoke(self, gccinv, sourcefile): + raise ValueError('test of raising an exception') + + ctxt = self.make_ctxt('buggy', capture_exceptions=True) + tool = BuggyChecker(ctxt) + return tool + + def verify_basic_metadata(self, analysis, sourcefile): + self.assert_metadata(analysis, 'buggy', sourcefile) + + def test_exception_handling(self): + analysis = self.invoke('test-sources/harmless.c') + #print(analysis) + self.assertEqual(len(analysis.results), 1) + r0 = analysis.results[0] + self.assertIsInstance(r0, Failure) + self.assertEqual(r0.failureid, 'exception') + self.assertEqual(r0.message.text, + ('Exception running buggy:' + ' test of raising an exception')) + self.assertTrue(analysis.customfields['traceback'].startswith( + 'Traceback (most recent call last):\n')) + +############################################################################ +# Entrypoint +############################################################################ + +def tool_main(argv, tool_class): + """ + Entrypoint for use by the various per-tool scripts + """ + # If we're invoked with "unittest" as the first param, + # run the unit test suite: + if len(argv) >= 2: + if argv[1] == 'unittest': + sys.argv = [argv[0]] + argv[2:] + return unittest.main() + + ctxt = Context(tool_class.name, enable_logging=False) + tool = tool_class(ctxt) + + ctxt.log('argv: %r' % argv) + + gccinv = GccInvocation(argv) + ctxt.log(' gccinv.sources: %r' % gccinv.sources) + assert len(gccinv.sources) == 1 + sourcefile = gccinv.sources[0] + ctxt.log(' sourcefile: %r' % sourcefile) + analysis = tool.checked_invoke(gccinv, sourcefile) + json.dump(analysis.to_json(), sys.stdout, indent=1) + +# Run the unit tests + +if __name__ == '__main__': + sys.exit(unittest.main()) -- 1.8.5.3