akhuang updated this revision to Diff 192499.
akhuang marked 3 inline comments as done.
akhuang added a comment.

change `mkstemp` to `NamedTemporaryFile` and add `decode(utf-8)` so it works on 
python3.5


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59725/new/

https://reviews.llvm.org/D59725

Files:
  clang/utils/creduce-clang-crash.py

Index: clang/utils/creduce-clang-crash.py
===================================================================
--- clang/utils/creduce-clang-crash.py
+++ clang/utils/creduce-clang-crash.py
@@ -1,8 +1,14 @@
 #!/usr/bin/env python
 """Calls C-Reduce to create a minimal reproducer for clang crashes.
+
+Output files:
+  *.reduced.sh -- crash reproducer with minimal arguments
+  *.reduced.cpp -- the reduced file
+  *.test.sh -- interestingness test for C-Reduce
 """
 
-from argparse import ArgumentParser
+from __future__ import print_function
+from argparse import ArgumentParser, RawTextHelpFormatter
 import os
 import re
 import stat
@@ -15,10 +21,14 @@
 from distutils.spawn import find_executable
 
 verbose = False
-llvm_bin = None
 creduce_cmd = None
+clang_cmd = None
 not_cmd = None
 
+def verbose_print(*args, **kwargs):
+  if verbose:
+    print(*args, **kwargs)
+
 def check_file(fname):
   if not os.path.isfile(fname):
     sys.exit("ERROR: %s does not exist" % (fname))
@@ -33,166 +43,337 @@
     cmd = find_executable(cmd_path)
     if cmd:
       return cmd
-    sys.exit("ERROR: executable %s not found" % (cmd_path))
+    sys.exit("ERROR: executable `%s` not found" % (cmd_path))
 
   cmd = find_executable(cmd_name, path=cmd_dir)
   if cmd:
     return cmd
-  sys.exit("ERROR: %s not found in %s" % (cmd_name, cmd_dir))
 
-def quote_cmd(cmd):
-  return ' '.join(arg if arg.startswith('$') else pipes.quote(arg)
-                  for arg in cmd)
-
-def get_crash_cmd(crash_script):
-  with open(crash_script) as f:
-    # Assume clang call is on the last line of the script
-    line = f.readlines()[-1]
-    cmd = shlex.split(line)
-
-    # Overwrite the script's clang with the user's clang path
-    new_clang = check_cmd('clang', llvm_bin)
-    cmd[0] = pipes.quote(new_clang)
-    return cmd
+  if not cmd_dir:
+    cmd_dir = "$PATH"
+  sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
 
-def has_expected_output(crash_cmd, expected_output):
-  p = subprocess.Popen(crash_cmd,
-                       stdout=subprocess.PIPE,
-                       stderr=subprocess.STDOUT)
-  crash_output, _ = p.communicate()
-  return all(msg in crash_output for msg in expected_output)
-
-def get_expected_output(crash_cmd):
-  p = subprocess.Popen(crash_cmd,
-                       stdout=subprocess.PIPE,
-                       stderr=subprocess.STDOUT)
-  crash_output, _ = p.communicate()
-
-  # If there is an assertion failure, use that;
-  # otherwise use the last five stack trace functions
-  assertion_re = r'Assertion `([^\']+)\' failed'
-  assertion_match = re.search(assertion_re, crash_output)
-  if assertion_match:
-    return [assertion_match.group(1)]
-  else:
-    stacktrace_re = r'#[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
-    matches = re.findall(stacktrace_re, crash_output)
-    return matches[-5:]
-
-def write_interestingness_test(testfile, crash_cmd, expected_output,
-                               file_to_reduce):
-  filename = os.path.basename(file_to_reduce)
-  if filename not in crash_cmd:
-    sys.exit("ERROR: expected %s to be in the crash command" % filename)
-
-  # Replace all instances of file_to_reduce with a command line variable
-  output = ['#!/bin/bash',
-            'if [ -z "$1" ] ; then',
-            '  f=%s' % (pipes.quote(filename)),
-            'else',
-            '  f="$1"',
-            'fi']
-  cmd = ['$f' if s == filename else s for s in crash_cmd]
-
-  output.append('%s --crash %s >& t.log || exit 1' % (pipes.quote(not_cmd),
-                                                      quote_cmd(cmd)))
-
-  for msg in expected_output:
-    output.append('grep %s t.log || exit 1' % pipes.quote(msg))
-
-  with open(testfile, 'w') as f:
-    f.write('\n'.join(output))
-  os.chmod(testfile, os.stat(testfile).st_mode | stat.S_IEXEC)
-
-def check_interestingness(testfile, file_to_reduce):
-  testfile = os.path.abspath(testfile)
-
-  # Check that the test considers the original file interesting
-  with open(os.devnull, 'w') as devnull:
-    returncode = subprocess.call(testfile, stdout=devnull)
-  if returncode:
-    sys.exit("The interestingness test does not pass for the original file.")
-
-  # Check that an empty file is not interesting
-  _, empty_file = tempfile.mkstemp()
-  with open(os.devnull, 'w') as devnull:
-    returncode = subprocess.call([testfile, empty_file], stdout=devnull)
-  os.remove(empty_file)
-  if not returncode:
-    sys.exit("The interestingness test passes for an empty file.")
-
-def clang_preprocess(file_to_reduce, crash_cmd, expected_output):
-  _, tmpfile = tempfile.mkstemp()
-  shutil.copy(file_to_reduce, tmpfile)
-
-  cmd = crash_cmd + ['-E', '-P']
-  p = subprocess.Popen(cmd,
-                       stdout=subprocess.PIPE,
-                       stderr=subprocess.STDOUT)
-  preprocessed, _ = p.communicate()
-
-  with open(file_to_reduce, 'w') as f:
-    f.write(preprocessed)
-
-  if has_expected_output(crash_cmd, expected_output):
-    if verbose:
-      print("Successfuly preprocessed with %s" % (quote_cmd(cmd)))
-    os.remove(tmpfile)
-  else:
-    if verbose:
-      print("Failed to preprocess with %s" % (quote_cmd(cmd)))
-    shutil.move(tmpfile, file_to_reduce)
-
-
-def filter_args(args, opts_startswith=[]):
-  result = [arg for arg in args if all(not arg.startswith(a) for a in
-                                       opts_startswith)]
-  return result
-
-def try_remove_args(cmd, expected_output, msg=None, extra_arg=None, **kwargs):
-  new_cmd = filter_args(cmd, **kwargs)
-  if extra_arg and extra_arg not in new_cmd:
-    new_cmd.append(extra_arg)
-  if new_cmd != cmd and has_expected_output(new_cmd, expected_output):
-    if msg and verbose:
-      print(msg)
-    return new_cmd
-  return cmd
-
-def simplify_crash_cmd(crash_cmd, expected_output):
-  new_cmd = try_remove_args(crash_cmd, expected_output,
-                            msg="Removed debug info options",
-                            opts_startswith=["-gcodeview",
-                                             "-dwarf-column-info",
-                                             "-debug-info-kind=",
-                                             "-debugger-tuning=",
-                                             "-gdwarf"])
-  new_cmd = try_remove_args(new_cmd, expected_output,
-                            msg="Replaced -W options with -w",
-                            extra_arg='-w',
-                            opts_startswith=["-W"])
-  new_cmd = try_remove_args(new_cmd, expected_output,
-                            msg="Replaced optimization level with -O0",
-                            extra_arg="-O0",
-                            opts_startswith=["-O"])
-  return new_cmd
+def quote_cmd(cmd):
+  return ' '.join(pipes.quote(arg) for arg in cmd)
+
+def write_to_script(text, filename):
+  with open(filename, 'w') as f:
+    f.write(text)
+  os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
+
+class Reduce(object):
+  def __init__(self, crash_script, file_to_reduce):
+    crash_script_name, crash_script_ext = os.path.splitext(crash_script)
+    file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
+
+    self.testfile = file_reduce_name + '.test.sh'
+    self.crash_script = crash_script_name + '.reduced' + crash_script_ext
+    self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
+    shutil.copy(file_to_reduce, self.file_to_reduce)
+
+    self.clang = clang_cmd
+    self.clang_args = []
+    self.expected_output = []
+    self.is_crash = True
+    self.creduce_flags = ["--tidy"]
+
+    self.read_clang_args(crash_script, file_to_reduce)
+    self.read_expected_output()
+
+  def get_crash_cmd(self, cmd=None, args=None, filename=None):
+    if not cmd:
+      cmd = self.clang
+    if not args:
+      args = self.clang_args
+    if not filename:
+      filename = self.file_to_reduce
+
+    return [cmd] + args + [filename]
+
+  def read_clang_args(self, crash_script, filename):
+    print("\nReading arguments from crash script...")
+    with open(crash_script) as f:
+      # Assume clang call is on the last line of the script
+      line = f.readlines()[-1]
+      cmd = shlex.split(line)
+
+    # Remove clang and filename from the command
+    # Assume the last occurrence of the filename is the clang input file
+    del cmd[0]
+    for i in range(len(cmd)-1, -1, -1):
+      if cmd[i] == filename:
+        del cmd[i]
+        break
+    self.clang_args = cmd
+    verbose_print("Clang arguments:", quote_cmd(self.clang_args))
+
+  def read_expected_output(self):
+    print("\nGetting expected crash output...")
+    p = subprocess.Popen(self.get_crash_cmd(),
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.STDOUT)
+    crash_output, _ = p.communicate()
+    result = []
+
+    # Remove color codes
+    ansi_escape = r'\x1b\[[0-?]*m'
+    crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
+
+    # Look for specific error messages
+    regexes = [r"Assertion `(.+)' failed", # Linux assert()
+               r"Assertion failed: (.+),", # FreeBSD/Mac assert()
+               r"fatal error: backend error: (.+)",
+               r"LLVM ERROR: (.+)",
+               r"UNREACHABLE executed (at .+)?!",
+               r"LLVM IR generation of ceclaration '(.+)'",
+               r"Generating code for declaration '(.+)'",
+               r"\*\*\* Bad machine code: (.+) \*\*\*"]
+    for msg_re in regexes:
+      match = re.search(msg_re, crash_output)
+      if match:
+        msg = match.group(1)
+        result = [msg]
+        print("Found message:", msg)
+
+        if "fatal error:" in msg_re:
+          self.is_crash = False
+        break
+
+    # If no message was found, use the top five stack trace functions,
+    # ignoring some common functions
+    # Five is a somewhat arbitrary number; the goal is to get a small number
+    # of identifying functions with some leeway for common functions
+    if not result:
+      stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
+      filters = ["PrintStackTraceSignalHandler",
+                 "llvm::sys::RunSignalHandlers",
+                 "SignalHandler", "__restore_rt", "gsignal", "abort"]
+      matches = re.findall(stacktrace_re, crash_output)
+      result = [x for x in matches if x and x.strip() not in filters][:5]
+      for msg in result:
+        print("Found stack trace function:", msg)
+
+    if not result:
+      print("ERROR: no crash was found")
+      print("The crash output was:\n========\n%s========" % crash_output)
+      sys.exit(1)
+
+    self.expected_output = result
+
+  def check_expected_output(self, args=None, filename=None):
+    if not args:
+      args = self.clang_args
+    if not filename:
+      filename = self.file_to_reduce
+
+    p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.STDOUT)
+    crash_output, _ = p.communicate()
+    return all(msg in crash_output.decode('utf-8') for msg in
+               self.expected_output)
+
+  def write_interestingness_test(self):
+    print("\nCreating the interestingness test...")
+
+    crash_flag = "--crash" if self.is_crash else ""
+
+    output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \
+        (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd()))
+
+    for msg in self.expected_output:
+      output += 'grep %s t.log || exit 1\n' % pipes.quote(msg)
+
+    write_to_script(output, self.testfile)
+    self.check_interestingness()
+
+  def check_interestingness(self):
+    testfile = os.path.abspath(self.testfile)
+
+    # Check that the test considers the original file interesting
+    with open(os.devnull, 'w') as devnull:
+      returncode = subprocess.call(testfile, stdout=devnull)
+    if returncode:
+      sys.exit("The interestingness test does not pass for the original file.")
+
+    # Check that an empty file is not interesting
+    # Instead of modifying the filename in the test file, just run the command
+    empty_file = tempfile.NamedTemporaryFile()
+    is_interesting = self.check_expected_output(filename=empty_file.name)
+    empty_file.close()
+    if is_interesting:
+      sys.exit("The interestingness test passes for an empty file.")
+
+  def clang_preprocess(self):
+    print("\nTrying to preprocess the source file...")
+    # use delete=False in case the tmpfile flag causes problems when copying
+    tmpfile = tempfile.NamedTemporaryFile(delete=False)
+
+    cmd = self.get_crash_cmd() + ['-E', '-P']
+    try:
+      subprocess.check_call(cmd, stdout=tmpfile)
+      if self.check_expected_output(filename=tmpfile.name):
+        print("Successfully preprocessed")
+        shutil.copy(tmpfile.name, self.file_to_reduce)
+      else:
+        print("No longer crashes after preprocessing -- using original source")
+    except subprocess.CalledProcessError:
+      print("Preprocessing failed")
+    tmpfile.close()
+    os.remove(tmpfile.name)
+
+  @staticmethod
+  def filter_args(args, opts_equal=[], opts_startswith=[],
+                  opts_one_arg_startswith=[]):
+    result = []
+    skip_next = False
+    for arg in args:
+      if skip_next:
+        skip_next = False
+        continue
+      if any(arg == a for a in opts_equal):
+        continue
+      if any(arg.startswith(a) for a in opts_startswith):
+        continue
+      if any(arg.startswith(a) for a in opts_one_arg_startswith):
+        skip_next = True
+        continue
+      result.append(arg)
+    return result
+
+  def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
+    new_args = self.filter_args(args, **kwargs)
+
+    if extra_arg:
+      if extra_arg in new_args:
+        new_args.remove(extra_arg)
+      new_args.append(extra_arg)
+
+    if (new_args != args and
+        self.check_expected_output(args=new_args)):
+      if msg:
+        verbose_print(msg)
+      return new_args
+    return args
+
+  def try_remove_arg_by_index(self, args, index):
+    new_args = args[:index] + args[index+1:]
+    removed_arg = args[index]
+
+    # Heuristic for grouping arguments:
+    # remove next argument if it doesn't start with "-"
+    if index < len(new_args) and not new_args[index].startswith('-'):
+      del new_args[index]
+      removed_arg += ' ' + args[index+1]
+
+    if self.check_expected_output(args=new_args):
+      verbose_print("Removed", removed_arg)
+      return new_args, index
+    return args, index+1
+
+  def simplify_clang_args(self):
+    """Simplify clang arguments before running C-Reduce to reduce the time the
+    interestingness test takes to run.
+    """
+    print("\nSimplifying the clang command...")
+
+    # Remove some clang arguments to speed up the interestingness test
+    new_args = self.clang_args
+    new_args = self.try_remove_args(new_args,
+                                    msg="Removed debug info options",
+                                    opts_startswith=["-gcodeview",
+                                                     "-debug-info-kind=",
+                                                     "-debugger-tuning="])
+    # Not suppressing warnings (-w) sometimes prevents the crash from occurring
+    # after preprocessing
+    new_args = self.try_remove_args(new_args,
+                                    msg="Replaced -W options with -w",
+                                    extra_arg='-w',
+                                    opts_startswith=["-W"])
+    new_args = self.try_remove_args(new_args,
+                                    msg="Replaced optimization level with -O0",
+                                    extra_arg="-O0",
+                                    opts_startswith=["-O"])
+
+    # Try to remove compilation steps
+    new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
+                                    extra_arg="-emit-llvm")
+    new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
+                                    extra_arg="-fsyntax-only")
+
+    # Try to make implicit int an error for more sensible test output
+    new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
+                                    opts_equal=["-w"],
+                                    extra_arg="-Werror=implicit-int")
+
+    self.clang_args = new_args
+    verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
+
+  def reduce_clang_args(self):
+    """Minimize the clang arguments after running C-Reduce, to get the smallest
+    command that reproduces the crash on the reduced file.
+    """
+    print("\nReducing the clang crash command...")
+
+    new_args = self.clang_args
+
+    # Remove some often occurring args
+    new_args = self.try_remove_args(new_args, msg="Removed -D options",
+                                    opts_startswith=["-D"])
+    new_args = self.try_remove_args(new_args, msg="Removed -D options",
+                                    opts_one_arg_startswith=["-D"])
+    new_args = self.try_remove_args(new_args, msg="Removed -I options",
+                                    opts_startswith=["-I"])
+    new_args = self.try_remove_args(new_args, msg="Removed -I options",
+                                    opts_one_arg_startswith=["-I"])
+    new_args = self.try_remove_args(new_args, msg="Removed -W options",
+                                    opts_startswith=["-W"])
+
+    # Remove other cases that aren't covered by the heuristic
+    new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
+                                    opts_one_arg_startswith=["-mllvm"])
+
+    i = 0
+    while i < len(new_args):
+      new_args, i = self.try_remove_arg_by_index(new_args, i)
+
+    self.clang_args = new_args
+
+    reduced_cmd = quote_cmd(self.get_crash_cmd())
+    write_to_script(reduced_cmd, self.crash_script)
+    print("Reduced command:", reduced_cmd)
+
+  def run_creduce(self):
+    print("\nRunning C-Reduce...")
+    try:
+      p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
+                           [self.testfile, self.file_to_reduce])
+      p.communicate()
+    except KeyboardInterrupt:
+      # Hack to kill C-Reduce because it jumps into its own pgid
+      print('\n\nctrl-c detected, killed creduce')
+      p.kill()
 
 def main():
   global verbose
-  global llvm_bin
   global creduce_cmd
+  global clang_cmd
   global not_cmd
 
-  parser = ArgumentParser(description=__doc__)
+  parser = ArgumentParser(description=__doc__,
+                          formatter_class=RawTextHelpFormatter)
   parser.add_argument('crash_script', type=str, nargs=1,
                       help="Name of the script that generates the crash.")
   parser.add_argument('file_to_reduce', type=str, nargs=1,
                       help="Name of the file to be reduced.")
   parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
-                      required=True, help="Path to the LLVM bin directory.")
+                      help="Path to the LLVM bin directory.")
   parser.add_argument('--llvm-not', dest='llvm_not', type=str,
                       help="The path to the `not` executable. "
                       "By default uses the llvm-bin directory.")
+  parser.add_argument('--clang', dest='clang', type=str,
+                      help="The path to the `clang` executable. "
+                      "By default uses the llvm-bin directory.")
   parser.add_argument('--creduce', dest='creduce', type=str,
                       help="The path to the `creduce` executable. "
                       "Required if `creduce` is not in PATH environment.")
@@ -200,41 +381,21 @@
   args = parser.parse_args()
 
   verbose = args.verbose
-  llvm_bin = os.path.abspath(args.llvm_bin)
+  llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
   creduce_cmd = check_cmd('creduce', None, args.creduce)
+  clang_cmd = check_cmd('clang', llvm_bin, args.clang)
   not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
+
   crash_script = check_file(args.crash_script[0])
   file_to_reduce = check_file(args.file_to_reduce[0])
 
-  print("\nParsing the crash script and getting expected output...")
-  crash_cmd = get_crash_cmd(crash_script)
-
-  expected_output = get_expected_output(crash_cmd)
-  if len(expected_output) < 1:
-    sys.exit("ERROR: no crash was found")
-
-  print("\nSimplifying the crash command...")
-  crash_cmd = simplify_crash_cmd(crash_cmd, expected_output)
-
-  print("\nWriting interestingness test to file...")
-  testfile = os.path.splitext(file_to_reduce)[0] + '.test.sh'
-  write_interestingness_test(testfile, crash_cmd, expected_output,
-                             file_to_reduce)
-  check_interestingness(testfile, file_to_reduce)
-
-  print("\nPreprocessing the file to reduce...")
-  clang_preprocess(file_to_reduce, crash_cmd, expected_output)
-
-  print("\nRunning C-Reduce...")
-  try:
-    p = subprocess.Popen([creduce_cmd, testfile, file_to_reduce])
-    p.communicate()
-  except KeyboardInterrupt:
-    # Hack to kill C-Reduce because it jumps into its own pgid
-    print('\n\nctrl-c detected, killed creduce')
-    p.kill()
+  r = Reduce(crash_script, file_to_reduce)
 
-  # FIXME: reduce the clang crash command
+  r.simplify_clang_args()
+  r.write_interestingness_test()
+  r.clang_preprocess()
+  r.run_creduce()
+  r.reduce_clang_args()
 
 if __name__ == '__main__':
   main()
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to