paulkirth updated this revision to Diff 220239.
paulkirth added a comment.

Addresses problems running the standalone tool w/ the libTooling executors.

When using the CodeGenAction and setting LLVM backend options, I found several 
places where data races occurred. This seems like a more significant 
architectural issue than mitigating access to a few global variables. To avoid 
these issues I've locked the executor concurrency to 1, i.e. single threaded. 
This prevents any data races when the executor is configuring the backends for 
each compiler invocation.

I've included a python script based on the run-clang-tidy.py that tidy uses. 
This allows the standalone tool to take advantage of parallellism without 
running into the data races between threads.

I have also added a new checking mechanism to ensure that PGO profiles and the 
command line options are compatible.

Lastly, I've included new documentation and tests for the standalone tool.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D67253/new/

https://reviews.llvm.org/D67253

Files:
  clang-tools-extra/CMakeLists.txt
  clang-tools-extra/clang-misexpect/CMakeLists.txt
  clang-tools-extra/clang-misexpect/ClangMisExpect.cpp
  clang-tools-extra/clang-misexpect/ClangMisExpect.h
  clang-tools-extra/clang-misexpect/tool/CMakeLists.txt
  clang-tools-extra/clang-misexpect/tool/ClangMisExpectMain.cpp
  clang-tools-extra/clang-misexpect/tool/run-clang-misexpect.py
  clang-tools-extra/docs/clang-misexpect.rst
  clang-tools-extra/test/CMakeLists.txt
  clang-tools-extra/test/clang-misexpect/Inputs/basic.proftext
  clang-tools-extra/test/clang-misexpect/Inputs/clean.c
  clang-tools-extra/test/clang-misexpect/basic.cpp
  clang-tools-extra/test/lit.cfg.py
  clang/cmake/caches/Fuchsia-stage2.cmake
  clang/test/Profile/misexpect-switch-nonconst.c
  llvm/lib/Transforms/Utils/MisExpect.cpp

Index: llvm/lib/Transforms/Utils/MisExpect.cpp
===================================================================
--- llvm/lib/Transforms/Utils/MisExpect.cpp
+++ llvm/lib/Transforms/Utils/MisExpect.cpp
@@ -43,7 +43,8 @@
 static cl::opt<bool> PGOWarnMisExpect(
     "pgo-warn-misexpect", cl::init(false), cl::Hidden,
     cl::desc("Use this option to turn on/off "
-             "warnings about incorrect usage of llvm.expect intrinsics."));
+             "warnings about incorrect usage of llvm.expect intrinsics."),
+    cl::ZeroOrMore);
 
 } // namespace llvm
 
Index: clang/test/Profile/misexpect-switch-nonconst.c
===================================================================
--- clang/test/Profile/misexpect-switch-nonconst.c
+++ clang/test/Profile/misexpect-switch-nonconst.c
@@ -1,7 +1,7 @@
 // Test that misexpect emits no warning when switch condition is non-const
 
 // RUN: llvm-profdata merge %S/Inputs/misexpect-switch-nonconst.proftext -o %t.profdata
-// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify
+// RUN: %clang_cc1 %s -O2 -o - -disable-llvm-passes -emit-llvm -fprofile-instrument-use-path=%t.profdata -verify -Wmisexpect
 
 // expected-no-diagnostics
 int sum(int *buff, int size);
Index: clang/cmake/caches/Fuchsia-stage2.cmake
===================================================================
--- clang/cmake/caches/Fuchsia-stage2.cmake
+++ clang/cmake/caches/Fuchsia-stage2.cmake
@@ -207,6 +207,7 @@
   LTO
   clang-apply-replacements
   clang-doc
+  clang-misexpect
   clang-format
   clang-resource-headers
   clang-include-fixer
Index: clang-tools-extra/test/lit.cfg.py
===================================================================
--- clang-tools-extra/test/lit.cfg.py
+++ clang-tools-extra/test/lit.cfg.py
@@ -144,6 +144,12 @@
     ('%run_clang_tidy',
      '%s %s' % (python_exec, run_clang_tidy)) )
 
+run_clang_misexpect = os.path.join(
+    config.test_source_root, "..", "clang-misexpect", "tool", "run-clang-misexpect.py")
+config.substitutions.append(
+    ('%run_clang_misexpect',
+     '%s %s' % (python_exec, run_clang_misexpect)) )
+
 clangd_benchmarks_dir = os.path.join(os.path.dirname(config.clang_tools_dir),
                                      "tools", "clang", "tools", "extra",
                                      "clangd", "benchmarks")
Index: clang-tools-extra/test/clang-misexpect/basic.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/test/clang-misexpect/basic.cpp
@@ -0,0 +1,41 @@
+// Test that clang-misexpect can run over a compilation database
+
+// Now create a directory with a compilation database file and ensure we don't
+// use it after failing to parse commands from the command line:
+//
+// RUN: mkdir -p %T/misexpect/
+// RUN: echo '[{"directory": "%/T/misexpect/","arguments": ["clang++","-O2","-c","%/T/misexpect/basic.cpp"], "file": "basic.cpp"},{"directory": "%/T/misexpect/","command": "clang -c %/T/misexpect/clean.c", "file": "%/T/misexpect/clean.c"}]' > %T/misexpect/compile_commands.json
+// RUN: cat %s > %T/misexpect/basic.cpp
+// RUN: cp %S/Inputs/clean.c %T/misexpect/clean.c
+// RUN: llvm-profdata merge %S/Inputs/basic.proftext -o %t.profdata
+// RUN: clang-misexpect --profile-dir=%t.profdata -p=%T/misexpect %T/misexpect/basic.cpp -profile-format=clang
+// RUN: not clang-misexpect --profile-dir=%t.profdata -p=%T/misexpect %T/misexpect/basic.cpp -profile-format=llvm
+// RUN: %run_clang_misexpect -profile-path=%t.profdata -p=%T/misexpect %T/misexpect/basic.cpp -profile-format=clang
+// RUN: not %run_clang_misexpect -profile-path=%t.profdata -p=%T/misexpect %T/misexpect/basic.cpp -profile-format=sample
+
+// CHECK: basic.cpp:35 warning: Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% ({{[0-9]+ / [0-9]+}}) of profiled executions.
+// CHECK-NEXT: basic.cpp:35 remark: Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% ({{[0-9]+ / [0-9]+}}) of profiled executions.
+
+// CHECK-NOT: clean.c:13 warning: Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% ({{[0-9]+ / [0-9]+}}) of profiled executions.
+// CHECK-NOT: clean.c:13 remark: Potential performance regression from use of __builtin_expect(): Annotation was correct on {{.+}}% ({{[0-9]+ / [0-9]+}}) of profiled executions.
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+int foo(int);
+int baz(int);
+int buzz();
+
+const int inner_loop = 100;
+const int outer_loop = 2000;
+
+int bar() {
+  int rando = buzz();
+  int x = 0;
+  if (likely(rando % (outer_loop * inner_loop) == 0)) {
+    x = baz(rando);
+  } else {
+    x = foo(50);
+  }
+  return x;
+}
Index: clang-tools-extra/test/clang-misexpect/Inputs/clean.c
===================================================================
--- /dev/null
+++ clang-tools-extra/test/clang-misexpect/Inputs/clean.c
@@ -0,0 +1,19 @@
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+int foo(int);
+int baz(int);
+int buzz();
+
+const int inner_loop = 100;
+const int outer_loop = 2000;
+
+int bar() {
+  int rando = buzz();
+  int x = 0;
+  if (unlikely(rando % (outer_loop * inner_loop) == 0)) {
+    x = baz(rando);
+  } else {
+    x = foo(50);
+  }
+  return x;
+}
Index: clang-tools-extra/test/clang-misexpect/Inputs/basic.proftext
===================================================================
--- /dev/null
+++ clang-tools-extra/test/clang-misexpect/Inputs/basic.proftext
@@ -0,0 +1,8 @@
+bar
+# Func Hash:
+45795613684824
+# Num Counters:
+2
+# Counter Values:
+200000
+0
Index: clang-tools-extra/test/CMakeLists.txt
===================================================================
--- clang-tools-extra/test/CMakeLists.txt
+++ clang-tools-extra/test/CMakeLists.txt
@@ -47,6 +47,7 @@
   clang-apply-replacements
   clang-change-namespace
   clang-doc
+  clang-misexpect
   clang-include-fixer
   clang-move
   clang-query
Index: clang-tools-extra/docs/clang-misexpect.rst
===================================================================
--- /dev/null
+++ clang-tools-extra/docs/clang-misexpect.rst
@@ -0,0 +1,215 @@
+===================
+Clang-Misexpect
+===================
+
+.. contents::
+
+.. toctree::
+   :maxdepth: 1
+
+A standalone tool for verifying the accuracy of ``__builtin_expect()`` annotations
+---------------------------------------------------------------------------------
+
+:program:`clang-misexpect` is a standalone tool built on top of 
+:program:`clang`'s `LibTooling` infrastructure.
+
+
+The tool is in a very early development stage, so you might encounter bugs and
+crashes. Submitting reports with information about how to reproduce the issue
+to `the LLVM bugtracker <https://llvm.org/bugs>`_ will definitely help the
+project. If you have any ideas or suggestions, please to put a feature request
+there.
+
+At it's core it is a simple wrapper around the compiler that enables a
+user to perform misexpect verification across an entire project through
+use of a ``compile_commands.json``. If you have a suitable LLVM PGO profile,
+and a compile commands database, then you can use the clang-misexpect
+tool to find places in your codebase that may have problematic uses of
+the ``__builtin_expect()`` annotations.
+
+:program:`clang-misexpect` automatically curates compiler flags found in the
+compilation database when running the standalone tool. This allows us to
+avoid issues for incompatible options, or when the compilation database
+contains flags for incompatible types of profiling. Prior to each
+compiler invocation we remove any conflicting flags and set the
+appropriate options to generate misexpect diagnostics. We also disable
+code generation, so only the minimal amount of the LLVM backend is used
+to issue our diagnostics.
+
+Running clang-misexpect
+-----------------------
+
+Running on over compilation database
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By default clang-misexpect will run over an entire compilation database.
+This can be achieved in one of two ways.
+
+The first, and most straightforward method is to run the tool directly,
+which will run a portion of the compiler over each command in the
+compilation database, and issue any warnings when the
+``__builtin_expect()`` annotations mismatch with the recorded profiling
+counters.
+
+
+.. code-block:: console
+
+    $ clang-misexpect -profile-dir=/path/to/profile.profdata -p=/path/to/compdb -profile-format=clang source.cpp --executor=standalone
+
+clang-misexpect does it's checks in the LLVM backend, which is a unique
+case for `LibTooling` based projects. The backend is not 100% safe to use
+in a multithreaded manner yet, so we lock the concurrency of
+clang-misexpect to run single threaded.
+
+To avoid this penalty, we provide a python wrapper that can use the
+clang-misexpect tool directly on each entry in the compilation database.
+
+This allows us to make use of parallel processing and speed up the
+checks for large projects. The downside is that the diagnostics are no
+longer formatted as nicely when using the tool directly.
+
+Once all concurrency issues are addressed, we plan to remove the
+concurrency limitations, though the concurrency level will still be
+configurable from the command line.
+
+::
+
+    run-clang-misexpect.py $PWD -profile-path=$PWD/profile.profdata -clang-misexpect-binary=/path/to/clang-misexpect -profile-format=clang
+
+Running on a single file
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is also possible to run :program:`clang-misexpect` on a single file. This is
+achievable by changing the `LibTooling` executor to a standalone executor
+
+::
+
+    $ clang-misexpect -profile-dir=/path/to/profile.profdata -p=/path/to/compdb -profile-format=clang source.cpp --executor=standalone
+
+Usage
+-----
+
+:program:`clang-misexpect` is designed to simplify using :program:`clang`'s
+``-Wmisexpect`` compiler flag to verify your project's use of
+``__builtin_expect()`` annotations.
+
+.. code-block:: console
+
+    $ clang-misexpect [options] <source0> [... <sourceN>]
+
+Options
+-------
+
+:program:`clang-misexpect` offers the following options:
+
+.. code-block:: console
+
+  USAGE: clang-misexpect [options] <source0> [... <sourceN>]
+
+  OPTIONS:
+
+  Generic Options:
+
+    --help                      - Display available options (--help-hidden for more)
+    --help-list                 - Display list of available options (--help-list-hidden for more)
+    --version                   - Display the version of this program
+
+  clang-misexpect options:
+
+    --extra-arg=<string>        - Additional argument to append to the compiler command line
+    --extra-arg-before=<string> - Additional argument to prepend to the compiler command line
+    -p=<string>                 - Build path
+    --profile-dir=<string>      - Specify a path to the profile data to use during validation
+    --profile-format=<value>    - Specify the format of the profile data used during validation
+      =clang                    -   Clang Instrumentation
+      =llvm                     -   IR Instrumentation
+      =csllvm                   -   Context sensitive IR Instrumentation
+      =sample                   -   Sampling Instrumentation
+    --verify=<value>            - Specify the type of profile format verification
+      =only                     -   Only checks that the profile format is compatable with the selected options
+      =full                     -   Performs full verification before running clang-misexpect checks over compile_commands.json
+      =none                     -   Skips profile format verification. Useful when running on individual files
+
+  -p <build-path> is used to read a compile command database.
+      For example, it can be a CMake build directory in which a file named
+      compile_commands.json exists (use -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
+      CMake option to get this output). When no build path is specified,
+      a search for compile_commands.json will be attempted through all
+      parent paths of the first input file . See:
+      https://clang.llvm.org/docs/HowToSetupToolingForLLVM.html for an
+      example of setting up Clang Tooling on a source tree.
+
+  <source0> ... specify the paths of source files. These paths are
+      looked up in the compile command database. If the path of a file is
+      absolute, it needs to point into CMake's source tree. If the path is
+      relative, the current working directory needs to be in the CMake
+      source tree and the file must be in a subdirectory of the current
+      working directory. "./" prefixes in the relative files will be
+      automatically removed, but the rest of a relative path must be a
+      suffix of a path in the compile command database.
+
+
+Important Options
+-----------------
+
+-p
+~~
+
+Tells clang-misexpect where to find the compilation database
+
+--profile-dir
+^^^^^^^^^^^^^
+
+Provides a path to the PGO profile
+
+--profile-format
+^^^^^^^^^^^^^^^^
+
+LLVM supports 4 types of profile formats: Frontend, IR, CS-IR, and
+Sampling.
+
++----------------+--------------------------------------------------------------------------------------+
+| Profile Type   | Description                                                                          |
++================+======================================================================================+
+| Frontend       | Profiling instrumentation added during compilation by the frontend, i.e. ``clang``    |
++----------------+--------------------------------------------------------------------------------------+
+| IR             | Profiling instrumentation added during by the LLVM backend                           |
++----------------+--------------------------------------------------------------------------------------+
+| CS-IR          | Context Sensitive IR based profiles                                                  |
++----------------+--------------------------------------------------------------------------------------+
+| Sampling       | Profiles collected through sampling with external tools, such as ``perf`` on Linux   |
++----------------+--------------------------------------------------------------------------------------+
+
+:program:`clang-misexpect` is compatible with all Profiling formats, but the
+profiling type must be given to the tool, so that the correct
+compilation options can be set.
+
+Background on MisExpect Diagnostics
+-----------------------------------
+
+MisExpect checks in the LLVM backend follow a simple procedure: if the
+profiling counter associated with an ``llvm.expect`` instruction was too
+low along the expected path, then to emit a diagnostic message to the
+user.
+
+The most natural place to perform the verification is just prior to when
+branch weights being assigned to the target instruction in the form of
+branch weight metadata.
+
+There are 3 key places in the LLVM backend where branch weights are
+created and assigned based on profiling information or the use of the
+``llvm.expect`` intrinsic, and our implementation focuses on these
+places to perform the verification.
+
+We calculate the threshold for emitting misexpect related diagnostics
+based on the values the compiler assigns to ``llvm.expect`` intrinsics,
+which can be set through the ``-likely-branch-weight`` and
+``-unlikely-branch-weight`` LLVM options. During verification, if the
+profile count is less than the calculated threshold, then we will emit a
+remark or warning detailing a potential performance regression. The
+diagnostic also reports the percentage of the time the annotation was
+correct during profiling to help developers reason about how to proceed.
+
+The diagnostics are also available in the form of optimization remarks,
+which can be serialized and processed through the ``opt-viewer.py``
+scripts in LLVM.
Index: clang-tools-extra/clang-misexpect/tool/run-clang-misexpect.py
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/tool/run-clang-misexpect.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python
+#
+#===- run-clang-misexpect.py - Parallel clang-misexpect ------*- python -*--===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===------------------------------------------------------------------------===#
+# FIXME: Integrate with clang-misexpect-diff.py
+
+"""
+Parallel clang-misexpect runner
+==========================
+
+Runs clang-misexpect over all files in a compilation database. Requires clang-misexpect
+$PATH.
+
+Example invocations.
+- Run clang-misexpect on all files in the compiler database.
+    run-clang-misexpect.py $PWD -profile-path somefile.profdata
+
+- Run clang-misexpect on all files in the compiler database, using a specific clang-misexpect binary.
+    run-clang-misexpect.py $PWD -profile-path somefile.profdata -profile-format=llvm -clang-misexpect-binary=/path/to/clang-misexpect
+
+Compilation database setup:
+http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html
+"""
+
+from __future__ import print_function
+
+import argparse
+import json
+import multiprocessing
+import os
+import re
+import subprocess
+import sys
+import threading
+
+is_py2 = sys.version[0] == '2'
+
+if is_py2:
+    import Queue as queue
+else:
+    import queue as queue
+
+def find_compilation_database(path):
+  """Adjusts the directory until a compilation database is found."""
+  result = './'
+  while not os.path.isfile(os.path.join(result, path)):
+    if os.path.realpath(result) == '/':
+      print('Error: could not find compilation database.')
+      sys.exit(1)
+    result += '../'
+  return os.path.realpath(result)
+
+
+def make_absolute(f, directory):
+  if os.path.isabs(f):
+    return f
+  return os.path.normpath(os.path.join(directory, f))
+
+
+def get_misexpect_invocation(f, clang_misexpect_binary, build_path,
+                             profile_path, profile_type, extra_arg,
+                             extra_arg_before, quiet):
+  """Gets a command line for clang-misexpect."""
+  start = [clang_misexpect_binary]
+  if profile_path is not None:
+    start.append('-profile-dir=' + profile_path)
+  for arg in extra_arg:
+      start.append('-extra-arg=%s' % arg)
+  for arg in extra_arg_before:
+      start.append('-extra-arg-before=%s' % arg)
+  start.append('-p=' + build_path)
+  start.append('-profile-format=' + profile_type)
+  # make sure we use a standalone executor
+  start.append('-executor=standalone')
+  # Don't repeat profile verification
+  start.append('-verify=none')
+  if quiet:
+      start.append('-quiet')
+  start.append(f)
+  return start
+
+
+def run_misexpect(args, build_path, profile_path, profile_type, queue, lock,
+                  failed_files):
+  """Takes filenames out of queue and runs clang-misexpect on them."""
+  while True:
+    name = queue.get()
+    invocation = get_misexpect_invocation(name, args.clang_misexpect_binary,
+                                     build_path, profile_path, profile_type,
+                                     args.extra_arg, args.extra_arg_before,
+                                     args.quiet)
+    proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    output, err = proc.communicate()
+    if proc.returncode != 0:
+      failed_files.append(name)
+    with lock:
+      sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8'))
+      if len(err) > 0:
+        sys.stdout.flush()
+        sys.stderr.write(err.decode('utf-8'))
+    queue.task_done()
+
+
+def main():
+  parser = argparse.ArgumentParser(description='Runs clang-misexpect over all files '
+                                   'in a compilation database. Requires '
+                                   'clang-misexpect and clang-apply-replacements in '
+                                   '$PATH.')
+  parser.add_argument('-clang-misexpect-binary', metavar='PATH',
+                      default='clang-misexpect',
+                      help='path to clang-misexpect binary')
+  parser.add_argument('-j', type=int, default=0,
+                      help='number of misexpect instances to be run in parallel.')
+  parser.add_argument('files', nargs='*', default=['.*'],
+                      help='files to be processed (regex on path)')
+  parser.add_argument('-p', dest='build_path',
+                      help='Path used to read a compile command database.')
+  parser.add_argument('-profile-path', dest='profile_path',
+                      help='Path used to read a PGO profile.')
+  parser.add_argument('-profile-format', dest='profile_type', default="llvm",
+                      help='PGO profile format.')
+  parser.add_argument('-extra-arg', dest='extra_arg',
+                      action='append', default=[],
+                      help='Additional argument to append to the compiler '
+                      'command line.')
+  parser.add_argument('-extra-arg-before', dest='extra_arg_before',
+                      action='append', default=[],
+                      help='Additional argument to prepend to the compiler '
+                      'command line.')
+  parser.add_argument('-quiet', action='store_true',
+                      help='Run clang-misexpect in quiet mode')
+  args = parser.parse_args()
+
+  db_path = 'compile_commands.json'
+
+  if args.build_path is not None:
+    build_path = args.build_path
+  else:
+    # Find our database
+    build_path = find_compilation_database(db_path)
+
+  if args.profile_path is not None:
+    profile_path = args.profile_path
+  else:
+    profile_path=find_compilation_database("default.profdata")
+
+  profile_type = args.profile_type
+
+  try:
+    # Make sure that the profile is the correct format & compdb exists
+    invocation = [args.clang_misexpect_binary, '-verify=only']
+    invocation.append('-p=' + build_path)
+    invocation.append('-profile-dir=' + profile_path)
+    invocation.append('-profile-format=' + profile_type)
+    if args.quiet:
+      # Even with -quiet we still want to check if we can call clang-tidy.
+      with open(os.devnull, 'w') as dev_null:
+        subprocess.check_call(invocation, stdout=dev_null)
+    else:
+      subprocess.check_call(invocation)
+  except:
+    print("Unable to run clang-misexpect.", file=sys.stderr)
+    sys.exit(1)
+
+
+  # Load the database and extract all files.
+  database = json.load(open(os.path.join(build_path, db_path)))
+  files = [make_absolute(entry['file'], entry['directory'])
+           for entry in database]
+
+  max_task = args.j
+  if max_task == 0:
+    max_task = multiprocessing.cpu_count()
+
+  # Build up a big regexy filter from all command line arguments.
+  file_name_re = re.compile('|'.join(args.files))
+
+  return_code = 0
+  try:
+    # Spin up a bunch of misexpect-launching threads.
+    task_queue = queue.Queue(max_task)
+    # List of files with a non-zero return code.
+    failed_files = []
+    lock = threading.Lock()
+    for _ in range(max_task):
+      t = threading.Thread(target=run_misexpect,
+                           args=(args, build_path, profile_path, profile_type,
+                                 task_queue, lock, failed_files))
+      t.daemon = True
+      t.start()
+
+    # Fill the queue with files.
+    for name in files:
+      if file_name_re.search(name):
+        task_queue.put(name)
+
+    # Wait for all threads to be done.
+    task_queue.join()
+    if len(failed_files):
+      return_code = 1
+
+  except KeyboardInterrupt:
+    # This is a sad hack. Unfortunately subprocess goes
+    # bonkers with ctrl-c and we start forking merrily.
+    print('\nCtrl-C detected, goodbye.')
+    os.kill(0, 9)
+
+  sys.exit(return_code)
+
+if __name__ == '__main__':
+  main()
Index: clang-tools-extra/clang-misexpect/tool/ClangMisExpectMain.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/tool/ClangMisExpectMain.cpp
@@ -0,0 +1,231 @@
+//===-- ClangMisExpectMain.cpp - ClangMisexpect -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the main function for clang misexpect. It uses a
+// libTooling exectutor to check each file in the compiler_commands.json against
+// a provided PGO profile. When profile counters disagree with the compiler's
+// threshold values for likely and unlike branches clang-misexpect will issue a
+// diagnostic message.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../ClangMisExpect.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Tooling/AllTUsExecution.h"
+#include "clang/Tooling/ArgumentsAdjusters.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Execution.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Signals.h"
+#include <string>
+
+using namespace clang;
+using namespace clang::tooling;
+using namespace clang::misexpect;
+using namespace llvm;
+using Path = std::string;
+
+enum VerifyType {
+  VerifyOnly,
+  Full,
+  None,
+};
+
+static llvm::cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
+static llvm::cl::OptionCategory
+    ClangMisExpectCategory("clang-misexpect options");
+
+static llvm::cl::opt<Path> ProfileDir(
+    "profile-dir",
+    llvm::cl::desc(
+        "Specify a path to the profile data to use during validation"),
+    llvm::cl::cat(ClangMisExpectCategory));
+
+static llvm::cl::opt<ProfileKind> ProfFormat(
+    "profile-format",
+    llvm::cl::desc(
+        "Specify the format of the profile data used during validation"),
+    llvm::cl::init(ProfileKind::IR),
+    llvm::cl::values(clEnumValN(Clang, "clang", "Clang Instrumentation"),
+                     clEnumValN(IR, "llvm", "IR Instrumentation"),
+                     clEnumValN(CSIR, "csllvm",
+                                "Context sensitive IR Instrumentation"),
+                     clEnumValN(Sample, "sample", "Sampling Instrumentation")),
+    llvm::cl::cat(ClangMisExpectCategory));
+
+static llvm::cl::opt<VerifyType> Verification(
+    "verify", llvm::cl::desc("Specify the type of profile format verification"),
+    llvm::cl::init(VerifyType::Full),
+    llvm::cl::values(
+        clEnumValN(VerifyType::VerifyOnly, "only",
+                   "Only checks that the profile format is "
+                   "compatable with the selected options"),
+        clEnumValN(VerifyType::Full, "full",
+                   "Performs full verification before running clang-misexpect "
+                   "checks over compile_commands.json"),
+        clEnumValN(VerifyType::None, "none",
+                   "Skips profile format verification. Useful when running on "
+                   "individual files")),
+    llvm::cl::cat(ClangMisExpectCategory));
+
+namespace {
+
+std::string profileKindToString(ProfileKind k) {
+  switch (k) {
+  case ProfileKind::Clang:
+    return "Frontend based profile from option --profile-format=clang";
+  case ProfileKind::IR:
+    return "IR based profile from option --profile-format=llvm";
+  case ProfileKind::CSIR:
+    return "Context-Sensitive IR based profile from option "
+           "--profile-format=csllvm";
+  case ProfileKind::Sample:
+    return "Sampling based profile from option --profile-format=sample";
+  }
+}
+
+void reportProfileFormatError(ProfileKind k) {
+  auto &OS = llvm::errs();
+  OS.changeColor(raw_ostream::Colors::RED, true);
+  OS << "Error: ";
+  OS.resetColor();
+  OS << "Invalid profile format. Expected " << profileKindToString(ProfFormat)
+     << "\n";
+  exit(1);
+}
+
+// verifies that the give profile and given option match
+void verifyProfileFormat() {
+  if (Verification == VerifyType::None)
+    return;
+
+  auto &OS = llvm::errs();
+  if (ProfFormat == ProfileKind::Sample) {
+    llvm::LLVMContext C;
+    auto ProfReader =
+        llvm::sampleprof::SampleProfileReader::create(ProfileDir, C);
+    if (ProfReader.getError() == llvm::sampleprof_error::unrecognized_format)
+      reportProfileFormatError(ProfFormat);
+  } else {
+    auto ProfReader = InstrProfReader::create(ProfileDir);
+    if (!ProfReader)
+      reportProfileFormatError(ProfFormat);
+
+    auto Err = ProfReader.get()->readHeader();
+    if (Err) {
+      OS.changeColor(raw_ostream::Colors::RED, true);
+      OS << "Error: ";
+      OS.resetColor();
+      OS << llvm::toString(std::move(Err)) << "\n";
+      reportProfileFormatError(ProfFormat);
+    }
+
+    switch (ProfFormat) {
+    case ProfileKind::CSIR: {
+      if (!ProfReader.get()->hasCSIRLevelProfile())
+        reportProfileFormatError(ProfFormat);
+      break;
+    }
+    case ProfileKind::IR: {
+      if (!ProfReader.get()->isIRLevelProfile() &&
+          !ProfReader.get()->hasCSIRLevelProfile())
+        reportProfileFormatError(ProfFormat);
+      break;
+    }
+    case ProfileKind::Clang: {
+      if (ProfReader.get()->isIRLevelProfile()) {
+        reportProfileFormatError(ProfFormat);
+      }
+      break;
+    }
+    case ProfileKind::Sample:
+      llvm_unreachable("Found Sample profile when processing Instr Profiles");
+      break;
+    };
+  }
+}
+
+} // namespace
+
+int main(int argc, const char **argv) {
+  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
+
+  // TODO: Allow more concurrency when the LLVM backend is threadsafe when used
+  // with libTooling Executors
+  //
+  // clang-misexpect performs checks using the LLVM backend that are accessed
+  // through a CodeGenAction. TSAN revealed that when using an executor some
+  // data races exist when initializing the backend for each compiler invocation
+  // Once these races have been addressed, we can stop limiting concurrency
+  ExecutorConcurrency.setInitialValue(1);
+  ExecutorName.setInitialValue("all-TUs");
+
+  CommonOptionsParser OptionsParser(argc, argv, ClangMisExpectCategory,
+                                    llvm::cl::ZeroOrMore);
+
+  verifyProfileFormat();
+  if (Verification == VerifyType::VerifyOnly)
+    return 0;
+
+  auto &OS = llvm::errs();
+  auto Executor =
+      createExecutorFromCommandLineArgs(argc, argv, ClangMisExpectCategory);
+
+  if (!Executor) {
+    OS << "Failed to create executor --- "
+       << llvm::toString(Executor.takeError()) << "\n";
+    return 1;
+  }
+
+  auto ArgAdjuster = getStripPluginsAdjuster();
+  auto StripProfileWarnings = [](const CommandLineArguments &Args,
+                                 StringRef /*unused*/ Unused) {
+    CommandLineArguments AdjustedArgs;
+    std::set<std::string> FilteredArgs = {"-Wprofile-instr-unprofiled",
+                                          "-fcoverage-mapping", "-Werror"};
+    for (size_t I = 0, E = Args.size(); I != E; I++) {
+      if (FilteredArgs.find(Args[I]) != FilteredArgs.end())
+        continue;
+      AdjustedArgs.push_back(Args[I]);
+    }
+    return AdjustedArgs;
+  };
+
+  ArgAdjuster = combineAdjusters(StripProfileWarnings, ArgAdjuster);
+
+  ArgAdjuster = combineAdjusters(
+      getInsertArgumentAdjuster({"-Wmisexpect", "-Wno-profile-instr-unprofiled",
+                                 "-Wno-profile-instr-out-of-date"},
+                                tooling::ArgumentInsertPosition::END),
+      ArgAdjuster);
+
+  auto Err = Executor->get()->execute(
+      std::make_unique<misexpect::MisExpectFactory>(ProfileDir, ProfFormat),
+      ArgAdjuster);
+
+  if (Err) {
+    OS.changeColor(raw_ostream::Colors::RED, true);
+    OS << "Error: ";
+    OS.resetColor();
+    OS << llvm::toString(std::move(Err)) << "\n";
+  }
+
+  // Emit collected data.
+  Executor->get()->getToolResults()->forEachResult(
+      [&OS](llvm::StringRef Key, llvm::StringRef Value) {
+        OS << "----" << Key.str() << "\n" << Value.str() << "\n";
+      });
+  return 0;
+}
Index: clang-tools-extra/clang-misexpect/tool/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/tool/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LLVM_LINK_COMPONENTS
+  AllTargetsAsmParsers
+  AllTargetsDescs
+  AllTargetsInfos
+  support
+  )
+
+add_clang_tool(clang-misexpect
+  ClangMisExpectMain.cpp
+  )
+add_dependencies(clang-misexpect
+  clang-resource-headers
+  )
+target_link_libraries(clang-misexpect
+  PRIVATE
+  clangBasic
+  clangMisExpect
+  clangFrontend
+  clangCodeGen
+  clangTooling
+  clangToolingCore
+  clangToolingSyntax
+  )
+
Index: clang-tools-extra/clang-misexpect/ClangMisExpect.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/ClangMisExpect.h
@@ -0,0 +1,55 @@
+//===-- ClangMisExpect.h - ClangMisexpect -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a method to create the FrontendActionFactory for the
+// clang-misexpect tool. The factory consumes a compilation database and valid
+// profiling data to run the compiler over a codebase and issue warnings
+// generated from the -Wmisexpect compiler flags.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
+#include "clang/Frontend/FrontendOptions.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Rewrite/Frontend/FrontendActions.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+namespace clang {
+namespace misexpect {
+
+enum ProfileKind {
+  Clang,
+  IR,
+  CSIR,
+  Sample,
+};
+
+class MisExpectFactory : public tooling::FrontendActionFactory {
+  using Path = std::string;
+
+public:
+  MisExpectFactory(Path Profile, ProfileKind ProfileType);
+
+  bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
+                     FileManager *Files,
+                     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+                     DiagnosticConsumer *DiagConsumer) override;
+
+  std::unique_ptr<FrontendAction> create() override;
+
+private:
+  Path ProfilePath;
+  ProfileKind ProfileType;
+};
+
+} // namespace misexpect
+} // namespace clang
Index: clang-tools-extra/clang-misexpect/ClangMisExpect.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/ClangMisExpect.cpp
@@ -0,0 +1,88 @@
+//===-- ClangMisExpect.cpp - ClangMisexpect ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a method to create the FrontendActionFactory for the
+// clang-misexpect tool. The factory consumes a compilation database and valid
+// profiling data to run the compiler over a codebase and issue warnings
+// generated from the -Wmisexpect compiler flags.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangMisExpect.h"
+#include "clang/Basic/CodeGenOptions.h"
+#include "clang/CodeGen/CodeGenAction.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+using namespace clang::tooling;
+using namespace misexpect;
+
+#define DEBUG_TYPE "misexpect"
+
+MisExpectFactory::MisExpectFactory(Path ProfilePath, ProfileKind ProfileType)
+    : ProfilePath(ProfilePath), ProfileType(ProfileType) {}
+
+std::unique_ptr<FrontendAction> MisExpectFactory::create() {
+  return std::make_unique<EmitLLVMOnlyAction>();
+}
+
+bool MisExpectFactory::runInvocation(
+    std::shared_ptr<CompilerInvocation> Invocation, FileManager *Files,
+    std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+    DiagnosticConsumer *DiagConsumer) {
+  // Only run the compiler through IR generation
+  Invocation->getFrontendOpts().ProgramAction = frontend::EmitLLVMOnly;
+
+  // clear the existing profile flags and metadata
+  Invocation->getCodeGenOpts().setProfileUse(CodeGenOptions::ProfileNone);
+  Invocation->getCodeGenOpts().setProfileInstr(CodeGenOptions::ProfileNone);
+  Invocation->getCodeGenOpts().ProfileInstrumentUsePath = "";
+  Invocation->getCodeGenOpts().SampleProfileFile = "";
+  // Optimizaiton level must be at least 1 for misexpect warnings and PGO
+  Invocation->getCodeGenOpts().OptimizationLevel = 1;
+
+  // duplicate the logic in ExecuteCompilerInvocation to process llvm options
+  if (!Invocation->getFrontendOpts().LLVMArgs.empty()) {
+    unsigned NumArgs = Invocation->getFrontendOpts().LLVMArgs.size();
+    auto Args = std::make_unique<const char *[]>(NumArgs + 2);
+    Args[0] = "clang (LLVM option parsing)";
+    for (unsigned i = 0; i != NumArgs; ++i)
+      Args[i + 1] = Invocation->getFrontendOpts().LLVMArgs[i].c_str();
+    Args[NumArgs + 1] = nullptr;
+    llvm::cl::ParseCommandLineOptions(NumArgs + 1, Args.get());
+  }
+
+  // set new profiling options based on profile type
+  switch (ProfileType) {
+  case ProfileKind::Clang:
+    Invocation->getCodeGenOpts().setProfileUse(
+        CodeGenOptions::ProfileClangInstr);
+    break;
+  case ProfileKind::IR:
+    Invocation->getCodeGenOpts().setProfileUse(CodeGenOptions::ProfileIRInstr);
+    break;
+  case ProfileKind::CSIR:
+    Invocation->getCodeGenOpts().setProfileUse(
+        CodeGenOptions::ProfileCSIRInstr);
+    break;
+  case ProfileKind::Sample:
+    Invocation->getCodeGenOpts().SampleProfileFile = ProfilePath;
+    break;
+  };
+
+  if (ProfileType != ProfileKind::Sample)
+    Invocation->getCodeGenOpts().ProfileInstrumentUsePath = ProfilePath;
+
+  return FrontendActionFactory::runInvocation(Invocation, Files,
+                                              PCHContainerOps, DiagConsumer);
+}
+
+#undef DEBUG_TYPE
Index: clang-tools-extra/clang-misexpect/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang-tools-extra/clang-misexpect/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  )
+
+add_clang_library(clangMisExpect
+  ClangMisExpect.cpp
+
+  LINK_LIBS
+  clangBasic
+  clangCodeGen
+  clangFrontend
+  clangFrontendTool
+  clangTooling
+  clangToolingCore
+  )
+
+add_subdirectory(tool)
Index: clang-tools-extra/CMakeLists.txt
===================================================================
--- clang-tools-extra/CMakeLists.txt
+++ clang-tools-extra/CMakeLists.txt
@@ -4,6 +4,7 @@
 add_subdirectory(clang-reorder-fields)
 add_subdirectory(modularize)
 add_subdirectory(clang-tidy)
+add_subdirectory(clang-misexpect)
 
 add_subdirectory(clang-change-namespace)
 add_subdirectory(clang-doc)
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to