akhuang created this revision.
akhuang added reviewers: rnk, george.burgess.iv.
Herald added a reviewer: serge-sans-paille.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

-try to preprocess the file before reducing
-try to remove some command line arguments
-now requires a llvm bin directory since the generated crash script doesn't 
have an absolute path for clang


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59440

Files:
  clang/utils/creduce-clang-crash.py

Index: clang/utils/creduce-clang-crash.py
===================================================================
--- clang/utils/creduce-clang-crash.py
+++ clang/utils/creduce-clang-crash.py
@@ -1,7 +1,5 @@
 #!/usr/bin/env python
 """Calls C-Reduce to create a minimal reproducer for clang crashes.
-
-Requires C-Reduce and not (part of LLVM utils) to be installed.
 """
 
 from argparse import ArgumentParser
@@ -11,103 +9,231 @@
 import sys
 import subprocess
 import pipes
+import shlex
+import tempfile
+import shutil
 from distutils.spawn import find_executable
 
-def create_test(build_script, llvm_not):
+verbose = False
+llvm_bin = None
+creduce_cmd = None
+not_cmd = None
+
+def check_file(fname):
+  fname = os.path.abspath(fname)
+  if not os.path.isfile(fname):
+    sys.exit("ERROR: %s does not exist" % (fname))
+  return fname
+
+def check_cmd(cmd_name, cmd_dir, cmd_path=None):
   """
-  Create an interestingness test from the crash output.
-  Return as a string.
+  Returns absolute path to cmd_path if it is given,
+  or absolute path to cmd_dir/cmd_name.
   """
-  # Get clang call from build script
-  # Assumes the call is the last line of the script
-  with open(build_script) as f:
-    cmd = f.readlines()[-1].rstrip('\n\r')
-
-  # Get crash output
-  p = subprocess.Popen(build_script,
+  if cmd_path:
+    cmd = find_executable(cmd_path)
+    if cmd:
+      return cmd
+    sys.exit("ERROR: %s not found")
+
+  cmd = find_executable(cmd_name, path=cmd_dir)
+  if cmd:
+    return cmd
+  sys.exit("ERROR: %s not found in %s" % (cmd_name, cmd_dir))
+
+def quote_cmd(cmd):
+  return ' '.join(pipes.quote(s) for s in cmd)
+
+def get_crash_cmd(crash_script):
+  with open(crash_script) as f:
+    # Assume clang call is on the last line of the script
+    line = f.readlines()[-1]
+    cmd = shlex.split(line)
+
+    # Overwrite the script's clang with the user's clang path
+    clang_name = os.path.basename(cmd[0])
+    new_clang = check_cmd(clang_name, llvm_bin)
+    cmd[0] = pipes.quote(new_clang)
+    return cmd
+
+def has_expected_output(crash_cmd, expected_output):
+  p = subprocess.Popen(crash_cmd,
                        stdout=subprocess.PIPE,
                        stderr=subprocess.STDOUT)
   crash_output, _ = p.communicate()
+  for msg in expected_output:
+    if msg not in crash_output:
+      return False
+  return True
 
-  output = ['#!/bin/bash']
-  output.append('%s --crash %s >& t.log || exit 1' % (pipes.quote(llvm_not),
-                                                      cmd))
+def get_expected_output(crash_cmd):
+  p = subprocess.Popen(crash_cmd,
+                       stdout=subprocess.PIPE,
+                       stderr=subprocess.STDOUT)
+  crash_output, _ = p.communicate()
 
-  # Add messages from crash output to the test
-  # If there is an Assertion failure, use that; otherwise use the
-  # last five stack trace functions
+  # If there is an assertion failure, use that;
+  # otherwise use the last five stack trace functions
   assertion_re = r'Assertion `([^\']+)\' failed'
   assertion_match = re.search(assertion_re, crash_output)
   if assertion_match:
-    msg = assertion_match.group(1)
-    output.append('grep %s t.log || exit 1' % pipes.quote(msg))
+    return [assertion_match.group(1)]
   else:
     stacktrace_re = r'#[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
     matches = re.findall(stacktrace_re, crash_output)
-    del matches[:-5]
-    output += ['grep %s t.log || exit 1' % pipes.quote(msg) for msg in matches]
+    return matches[-5:]
+
+def write_interestingness_test(testfile, crash_cmd, expected_output):
+  output = ['#!/bin/bash']
+  output.append('%s --crash %s >& t.log || exit 1' % (pipes.quote(not_cmd),
+                                                      quote_cmd(crash_cmd)))
+
+  for msg in expected_output:
+    output.append('grep %s t.log || exit 1' % pipes.quote(msg))
 
-  return output
+  with open(testfile, 'w') as f:
+    f.write('\n'.join(output))
+  os.chmod(testfile, os.stat(testfile).st_mode | stat.S_IEXEC)
+
+def check_interestingness(testfile, file_to_reduce):
+  # Check that the test considers the original file interesting
+  with open(os.devnull, 'w') as devnull:
+    p = subprocess.Popen(testfile, stdout=devnull)
+    p.communicate()
+  if p.returncode:
+    sys.exit("The interestingness test does not pass for the original file.")
+
+  # Check that an empty file is not interesting
+  # file_to_reduce is hardcoded into the test, so this is a roundabout
+  # way to run it on an empty file
+  _, tmpfile = tempfile.mkstemp()
+  _, empty_file = tempfile.mkstemp()
+  shutil.copy(file_to_reduce, tmpfile)
+  shutil.copy(empty_file, file_to_reduce)
+  with open(os.devnull, 'w') as devnull:
+    p = subprocess.Popen(testfile, stdout=devnull)
+    p.communicate()
+  shutil.copy(tmpfile, file_to_reduce)
+  os.remove(empty_file)
+  if not p.returncode:
+    sys.exit("The interestingness test passes for an empty file.")
+
+def clang_preprocess(file_to_reduce, crash_cmd, expected_output):
+  _, tmpfile = tempfile.mkstemp()
+  shutil.copy(file_to_reduce, tmpfile)
+
+  cmd = crash_cmd + ['-E', '-P']
+  p = subprocess.Popen(cmd,
+                       stdout=subprocess.PIPE,
+                       stderr=subprocess.STDOUT)
+  preprocessed, _ = p.communicate()
+
+  with open(file_to_reduce, 'w') as f:
+    f.write(preprocessed)
+
+  if has_expected_output(crash_cmd, expected_output):
+    if verbose:
+      print("Successfuly preprocessed with %s" % (quote_cmd(crash_cmd)))
+    os.remove(tmpfile)
+  else:
+    if verbose:
+      print("Failed to preprocess with %s" % (quote_cmd(cmd)))
+    shutil.move(tmpfile, file_to_reduce)
+
+
+def filter_args(args, noargs_opts_to_remove=[],
+                noargs_opts_to_remove_startswith=[],
+                one_arg_opts_to_remove=[]):
+  result = []
+  skip_next = False
+
+  for arg in args:
+    if skip_next:
+      skip_next = False
+      continue
+    if (any(arg == a for a in noargs_opts_to_remove) or
+        any(arg.startswith(a) for a in noargs_opts_to_remove_startswith)):
+      continue
+    if any(arg == a for a in one_arg_opts_to_remove):
+      skip_next = True
+      continue
+    result.append(arg)
+  return result
+
+def try_remove_args(cmd, expected_output, msg=None, extra_args=[], **kwargs):
+  new_cmd = filter_args(cmd, **kwargs)
+  new_cmd += extra_args
+  if has_expected_output(new_cmd, expected_output):
+    if msg and verbose:
+      print(msg)
+    return new_cmd
+  return cmd
+
+def simplify_crash_cmd(crash_cmd, expected_output):
+  new_cmd = try_remove_args(crash_cmd, expected_output,
+                            msg="Removed -gcodeview",
+                            noargs_opts_to_remove=["-gcodeview"])
+  new_cmd = try_remove_args(crash_cmd, expected_output,
+                            msg="Removed debug info options",
+                            noargs_opts_to_remove_startswith=["-debug-info-kind=",
+                                                              "-debugger-tuning="])
+  new_cmd = try_remove_args(crash_cmd, expected_output,
+                            msg="Removed -W options and replaced with -w",
+                            extra_args=['-w'],
+                            noargs_opts_to_remove=['-w'],
+                            noargs_opts_to_remove_startswith=["-W"])
+  #FIXME: remove other args
+  return new_cmd
 
 def main():
+  global verbose
+  global llvm_bin
+  global creduce_cmd
+  global not_cmd
+
   parser = ArgumentParser(description=__doc__)
-  parser.add_argument('build_script', type=str, nargs=1,
-                      help='Name of the script that generates the crash.')
+  parser.add_argument('crash_script', type=str, nargs=1,
+                      help="Name of the script that generates the crash.")
   parser.add_argument('file_to_reduce', type=str, nargs=1,
-                      help='Name of the file to be reduced.')
-  parser.add_argument('-o', '--output', dest='output', type=str,
-                      help='Name of the output file for the reduction. Optional.')
+                      help="Name of the file to be reduced.")
+  parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
+                      required=True, help="Path to the LLVM bin directory.")
   parser.add_argument('--llvm-not', dest='llvm_not', type=str,
-                      help="The path to the llvm-not executable. "
-                      "Required if 'not' is not in PATH environment.");
+                      help="The path to the `not` executable. "
+                      "By default uses the llvm-bin directory.")
   parser.add_argument('--creduce', dest='creduce', type=str,
-                      help="The path to the C-Reduce executable. "
-                      "Required if 'creduce' is not in PATH environment.");
+                      help="The path to the `creduce` executable. "
+                      "Required if `creduce` is not in PATH environment.")
+  parser.add_argument('-v', '--verbose', action='store_true')
   args = parser.parse_args()
 
-  build_script = os.path.abspath(args.build_script[0])
-  file_to_reduce = os.path.abspath(args.file_to_reduce[0])
-  llvm_not = (find_executable(args.llvm_not) if args.llvm_not else
-              find_executable('not'))
-  creduce = (find_executable(args.creduce) if args.creduce else
-             find_executable('creduce'))
-
-  if not os.path.isfile(build_script):
-    print(("ERROR: input file '%s' does not exist") % build_script)
-    return 1
-
-  if not os.path.isfile(file_to_reduce):
-    print(("ERROR: input file '%s' does not exist") % file_to_reduce)
-    return 1
-
-  if not llvm_not:
-    parser.print_help()
-    return 1
-
-  if not creduce:
-    parser.print_help()
-    return 1
-
-  # Write interestingness test to file
-  test_contents = create_test(build_script, llvm_not)
-  testname, _ = os.path.splitext(file_to_reduce)
-  testfile = testname + '.test.sh'
-  with open(testfile, 'w') as f:
-    f.write('\n'.join(test_contents))
-  os.chmod(testfile, os.stat(testfile).st_mode | stat.S_IEXEC)
+  verbose = args.verbose
+  llvm_bin = os.path.abspath(args.llvm_bin)
+  creduce_cmd = check_cmd('creduce', None, args.creduce)
+  not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
+  crash_script = check_file(args.crash_script[0])
+  file_to_reduce = check_file(args.file_to_reduce[0])
 
-  # Confirm that the interestingness test passes
-  try:
-    with open(os.devnull, 'w') as devnull:
-      subprocess.check_call(testfile, stdout=devnull)
-  except subprocess.CalledProcessError:
-    print("For some reason the interestingness test does not return zero")
-    return 1
+  print("\nParsing the crash script and getting expected output...")
+  crash_cmd = get_crash_cmd(crash_script)
+  expected_output = get_expected_output(crash_cmd)
+  if len(expected_output) < 1:
+    sys.exit("ERROR: no crash was found")
+
+  print("\nSimplifying the crash command...")
+  crash_cmd = simplify_crash_cmd(crash_cmd, expected_output)
+
+  print("\nWriting interestingness test to file...")
+  testfile = os.path.splitext(file_to_reduce)[0] + '.test.sh'
+  write_interestingness_test(testfile, crash_cmd, expected_output)
+  check_interestingness(testfile, file_to_reduce)
 
-  # FIXME: try running clang preprocessor first
+  print("\nPreprocessing the file to reduce...")
+  clang_preprocess(file_to_reduce, crash_cmd, expected_output)
 
+  print("\nRunning C-Reduce...")
   try:
-    p = subprocess.Popen([creduce, testfile, file_to_reduce])
+    p = subprocess.Popen([creduce_cmd, testfile, file_to_reduce])
     p.communicate()
   except KeyboardInterrupt:
     # Hack to kill C-Reduce because it jumps into its own pgid
@@ -115,4 +241,4 @@
     p.kill()
 
 if __name__ == '__main__':
-  sys.exit(main())
+  main()
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to