Modify fix_includes.py for Kudu usage, add a wrapper

This makes a few small changes to the upstream fix_includes.py script:

- We name our tests 'foo-test.cc' instead of 'foo_test.cc'. Expands
  the regex appropriately.

- Add support for 'IWYU pragma: keep' to avoid removing duplicate
  headers. Without this, it was attempting to remove the duplicate
  include in bitshuffle_arch_wrapper.cc which includes the bitshuffle
  header twice with different #defines

- Adds a --blank_line_between_c_and_cxx_includes option which preserves
  the style that we use.

- Adds support for differentiating between thirdparty <foo> includes
  versus system <foo> includes by checking for file existence in a set
  of directories passed on the command line. Google apparently uses
  quoted includes like "protobuf/foo.h" for this purpose whereas we use
  <>.

This also adds a wrapper script build-support/iwyu.py which handles
invoking IWYU and also integrating it with the modified fix_includes.py.

This takes the place of the old 'iwyu.sh' and associated awk script. It
supports automatically piping the IWYU results back through the include
fixer and either producing a diff of errors (default output) or fixing
them in-situ.

Change-Id: I3c286271a39a0d825fb11e5610d8eb7e5b0729b9
Reviewed-on: http://gerrit.cloudera.org:8080/10106
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <a...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b4639690
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b4639690
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b4639690

Branch: refs/heads/master
Commit: b463969044c8d0b434650ff7540026d0011b7fe2
Parents: 767005c
Author: Todd Lipcon <t...@apache.org>
Authored: Wed Apr 18 13:51:58 2018 -0700
Committer: Todd Lipcon <t...@apache.org>
Committed: Fri Apr 20 05:16:34 2018 +0000

----------------------------------------------------------------------
 CMakeLists.txt                              |   6 +-
 README.adoc                                 |  10 +
 build-support/build_source_release.py       |   7 +-
 build-support/iwyu.py                       | 274 +++++++++++++++++++++++
 build-support/iwyu/__init__.py              |   0
 build-support/iwyu/fix_includes.py          | 144 +++++++++---
 build-support/iwyu/iwyu-filter.awk          | 143 ------------
 build-support/iwyu/iwyu.sh                  |  72 ------
 build-support/kudu_util.py                  |  11 +
 build-support/release/rat_exclude_files.txt |   1 +
 src/kudu/cfile/bitshuffle_arch_wrapper.cc   |   2 +-
 11 files changed, 416 insertions(+), 254 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 03ddbd9..c3bf2b4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1240,11 +1240,13 @@ if (UNIX)
 endif (UNIX)
 
 ############################################################
-# "make iwyu" target
+# "make iwyu" and "make iwyu-fix" target
 ############################################################
 if (UNIX)
-  add_custom_target(iwyu ${BUILD_SUPPORT_DIR}/iwyu/iwyu.sh)
+  add_custom_target(iwyu ${BUILD_SUPPORT_DIR}/iwyu.py --from-git)
+  add_custom_target(iwyu-fix ${BUILD_SUPPORT_DIR}/iwyu.py --fix --from-git)
   add_dependencies(iwyu pb-gen krpc-gen)
+  add_dependencies(iwyu-fix pb-gen krpc-gen)
 endif (UNIX)
 
 ############################################################

http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/README.adoc
----------------------------------------------------------------------
diff --git a/README.adoc b/README.adoc
index e3be2d8..22220d8 100644
--- a/README.adoc
+++ b/README.adoc
@@ -289,6 +289,16 @@ $ make iwyu
 This will scan any file which is dirty in your working tree, or changed since 
the last
 gerrit-integrated upstream change in your git log.
 
+If you want to run against a specific file, or against all files, you can use 
the
+`iwyu.py` script:
+
+[source,bash]
+----
+$ ./build-support/iwyu.py
+----
+
+See the output of `iwyu.py --help` for details on various modes of operation.
+
 === Building Kudu documentation
 
 Kudu's documentation is written in asciidoc and lives in the _docs_ 
subdirectory.

http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/build-support/build_source_release.py
----------------------------------------------------------------------
diff --git a/build-support/build_source_release.py 
b/build-support/build_source_release.py
index 6f36dd5..35ffa40 100755
--- a/build-support/build_source_release.py
+++ b/build-support/build_source_release.py
@@ -31,10 +31,7 @@ try:
 except ImportError:
   import urllib
 
-from kudu_util import check_output, confirm_prompt, Colors, get_my_email
-
-ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-GET_UPSTREAM_COMMIT_SCRIPT = os.path.join(ROOT, "build-support", 
"get-upstream-commit.sh")
+from kudu_util import check_output, confirm_prompt, Colors, get_my_email, 
get_upstream_commit, ROOT
 
 
 def check_repo_not_dirty():
@@ -54,7 +51,7 @@ def check_no_local_commits():
   Check that there are no local commits which haven't been pushed to the 
upstream
   repo via Jenkins.
   """
-  upstream_commit = 
check_output(GET_UPSTREAM_COMMIT_SCRIPT).strip().decode('utf-8')
+  upstream_commit = get_upstream_commit()
   cur_commit = check_output(["git", "rev-parse", 
"HEAD"]).strip().decode('utf-8')
 
   if upstream_commit == cur_commit:

http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/build-support/iwyu.py
----------------------------------------------------------------------
diff --git a/build-support/iwyu.py b/build-support/iwyu.py
new file mode 100755
index 0000000..4fdcbdd
--- /dev/null
+++ b/build-support/iwyu.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+from cStringIO import StringIO
+import glob
+import json
+import logging
+import optparse
+import os
+import re
+import subprocess
+import sys
+
+from kudu_util import get_upstream_commit, check_output, ROOT, Colors
+import iwyu.fix_includes
+from iwyu.fix_includes import ParseAndMergeIWYUOutput
+
+_USAGE = """\
+%prog [--fix] [--sort-only] [--all | --from-git | <path>...]
+
+%prog is a wrapper around include-what-you-use that passes the appropriate
+configuration and filters the output to ignore known issues. In addition,
+it can automatically pipe the output back into the IWYU-provided 
'fix_includes.py'
+script in order to fix any reported issues.
+"""
+
+_MAPPINGS_DIR = os.path.join(ROOT, "build-support/iwyu/mappings/")
+_TOOLCHAIN_DIR = os.path.join(ROOT, "thirdparty/clang-toolchain/bin")
+_IWYU_TOOL = os.path.join(ROOT, "build-support/iwyu/iwyu_tool.py")
+
+# Matches source files that we should run on.
+_RE_SOURCE_FILE = re.compile(r'\.(c|cc|h)$')
+
+# Matches compilation errors in the output of IWYU
+_RE_CLANG_ERROR = re.compile(r'^.+?:\d+:\d+: error:', re.MULTILINE)
+
+# Files that we don't want to ever run IWYU on, because they aren't clean yet.
+_MUTED_FILES = set([
+  "src/kudu/cfile/cfile_reader.h",
+  "src/kudu/cfile/cfile_writer.h",
+  "src/kudu/client/client-internal.h",
+  "src/kudu/client/client-test.cc",
+  "src/kudu/common/encoded_key-test.cc",
+  "src/kudu/common/schema.h",
+  "src/kudu/experiments/rwlock-perf.cc",
+  "src/kudu/hms/hms_catalog.cc",
+  "src/kudu/hms/hms_catalog.h",
+  "src/kudu/hms/hms_client-test.cc",
+  "src/kudu/hms/hms_client.cc",
+  "src/kudu/hms/hms_client.h",
+  "src/kudu/hms/mini_hms.h",
+  "src/kudu/master/catalog_manager.cc",
+  "src/kudu/master/catalog_manager.h",
+  "src/kudu/rpc/reactor.cc",
+  "src/kudu/rpc/reactor.h",
+  "src/kudu/security/ca/cert_management.cc",
+  "src/kudu/security/ca/cert_management.h",
+  "src/kudu/security/cert-test.cc",
+  "src/kudu/security/cert.cc",
+  "src/kudu/security/cert.h",
+  "src/kudu/security/openssl_util.cc",
+  "src/kudu/security/openssl_util.h",
+  "src/kudu/security/tls_context.cc",
+  "src/kudu/security/tls_handshake.cc",
+  "src/kudu/security/tls_socket.h",
+  "src/kudu/security/x509_check_host.cc",
+  "src/kudu/server/default-path-handlers.cc",
+  "src/kudu/server/webserver.cc",
+  "src/kudu/util/bit-util-test.cc",
+  "src/kudu/util/group_varint-test.cc",
+  "src/kudu/util/minidump.cc",
+  "src/kudu/util/mt-metrics-test.cc",
+  "src/kudu/util/process_memory.cc",
+  "src/kudu/util/rle-test.cc"
+])
+
+# Flags to pass to iwyu/fix_includes.py for Kudu-specific style.
+_FIX_INCLUDES_STYLE_FLAGS = [
+  '--blank_lines',
+  '--blank_line_between_c_and_cxx_includes',
+  '--separate_project_includes=kudu/'
+]
+
+# Directory containin the compilation database
+_BUILD_DIR = os.path.join(ROOT, 'build/latest')
+
+def _get_file_list_from_git():
+  upstream_commit = get_upstream_commit()
+  out = check_output(["git", "diff", "--name-only", 
upstream_commit]).splitlines()
+  return [l for l in out if _RE_SOURCE_FILE.search(l)]
+
+
+def _get_paths_from_compilation_db():
+  db_path = os.path.join(_BUILD_DIR, 'compile_commands.json')
+  with open(db_path, 'r') as fileobj:
+    compilation_db = json.load(fileobj)
+  return [entry['file'] for entry in compilation_db]
+
+def _run_iwyu_tool(paths):
+  iwyu_args = ['--max_line_length=256']
+  for m in glob.glob(os.path.join(_MAPPINGS_DIR, "*.imp")):
+    iwyu_args.append("--mapping_file=%s" % os.path.abspath(m))
+
+  cmdline = [_IWYU_TOOL, '-p', _BUILD_DIR]
+  cmdline.extend(paths)
+  cmdline.append('--')
+  cmdline.extend(iwyu_args)
+  # iwyu_tool.py requires include-what-you-use on the path
+  env = os.environ.copy()
+  env['PATH'] = "%s:%s" % (_TOOLCHAIN_DIR, env['PATH'])
+  def crash(output):
+    sys.exit((Colors.RED + "Failed to run IWYU tool.\n\n" + Colors.RESET +
+              Colors.YELLOW + "Command line:\n" + Colors.RESET +
+              "%s\n\n" +
+              Colors.YELLOW + "Output:\n" + Colors.RESET +
+              "%s") % (" ".join(cmdline), output))
+
+  try:
+    output = check_output(cmdline, env=env, stderr=subprocess.STDOUT)
+    if '\nFATAL ERROR: ' in output or _RE_CLANG_ERROR.search(output):
+      crash(output)
+    return output
+  except subprocess.CalledProcessError, e:
+    crash(e.output)
+
+
+def _is_muted(path):
+  assert os.path.isabs(path)
+  rel = os.path.relpath(path, ROOT)
+  return rel in _MUTED_FILES
+
+
+def _filter_paths(paths):
+  return [p for p in paths if not _is_muted(p)]
+
+
+def _relativize_paths(paths):
+  """ Make paths relative to the build directory. """
+  return [os.path.relpath(p, _BUILD_DIR) for p in paths]
+
+
+def _get_thirdparty_include_dirs():
+  return glob.glob(os.path.join(ROOT, "thirdparty", "installed", "*", 
"include"))
+
+
+def _get_fixer_flags(flags):
+  args = ['--quiet', '--nosafe_headers']
+  if flags.dry_run:
+    args.append("--dry_run")
+  for d in _get_thirdparty_include_dirs():
+    args.extend(['--thirdparty_include_dir', d])
+  args.extend(_FIX_INCLUDES_STYLE_FLAGS)
+  fixer_flags, _ = iwyu.fix_includes.ParseArgs(args)
+  return fixer_flags
+
+
+def _do_iwyu(flags, paths):
+  iwyu_output = _run_iwyu_tool(paths)
+  if flags.dump_iwyu_output:
+    logging.info("Dumping iwyu output to %s", flags.dump_iwyu_output)
+    with file(flags.dump_iwyu_output, "w") as f:
+      print(iwyu_output, file=f)
+  stream = StringIO(iwyu_output)
+  fixer_flags = _get_fixer_flags(flags)
+
+  # Passing None as 'fix_paths' tells the fixer script to process
+  # all of the IWYU output, instead of just the output corresponding
+  # to files in 'paths'. This means that if you run this script on a
+  # .cc file, it will also report and fix errors in headers included
+  # by that .cc file.
+  fix_paths = None
+  records = ParseAndMergeIWYUOutput(stream, fix_paths, fixer_flags)
+  unfiltered_count = len(records)
+  records = [r for r in records if not _is_muted(os.path.abspath(r.filename))]
+  if len(records) < unfiltered_count:
+    logging.info("Muted IWYU suggestions on %d file(s)", unfiltered_count - 
len(records))
+  return iwyu.fix_includes.FixManyFiles(records, fixer_flags)
+
+
+def _do_sort_only(flags, paths):
+  fixer_flags = _get_fixer_flags(flags)
+  iwyu.fix_includes.SortIncludesInFiles(paths, fixer_flags)
+
+
+def main(argv):
+  parser = optparse.OptionParser(usage=_USAGE)
+  for i, arg in enumerate(argv):
+    if arg.startswith('-'):
+      argv[i] = argv[i].replace('_', '-')
+
+  parser.add_option('--all', action='store_true',
+                    help=('Process all files listed in the compilation 
database of the current '
+                          'build.'))
+
+  parser.add_option('--from-git', action='store_true',
+                    help=('Determine the list of files to run IWYU 
automatically based on git. '
+                          'All files which are modified in the current working 
tree or in commits '
+                          'not yet committed upstream by gerrit are 
processed.'))
+
+  parser.add_option('--fix', action='store_false', dest="dry_run", 
default=True,
+                    help=('If this is set, fixes IWYU issues in place.'))
+  parser.add_option('-s', '--sort-only', action='store_true',
+                    help=('Just sort #includes of files listed on cmdline;'
+                          ' do not add or remove any #includes'))
+
+  parser.add_option('--dump-iwyu-output', type='str',
+                    help=('A path to dump the raw IWYU output to. This can be 
useful for '
+                          'debugging this tool.'))
+
+  (flags, paths) = parser.parse_args(argv[1:])
+
+  if bool(flags.from_git) + bool(flags.all) + (len(paths) > 0) != 1:
+    sys.exit('Must specify exactly one of --all, --from-git, or a list of 
paths')
+
+  do_filtering = True
+  if flags.from_git:
+    paths = _get_file_list_from_git()
+    paths = [os.path.abspath(os.path.join(ROOT, p)) for p in paths]
+  elif paths:
+    paths = [os.path.abspath(p) for p in paths]
+    # If paths are specified explicitly, don't filter them out.
+    do_filtering = False
+  elif flags.all:
+    paths = _filter_paths(_get_paths_from_compilation_db())
+  else:
+    assert False, "Should not reach here"
+
+  if do_filtering:
+    orig_count = len(paths)
+    paths = _filter_paths(paths)
+    if len(paths) != orig_count:
+      logging.info("Filtered %d paths muted by configuration in iwyu.py",
+                   orig_count - len(paths))
+  else:
+    muted_paths = [p for p in paths if _is_muted(p)]
+    if muted_paths:
+      logging.warning("%d selected path(s) are known to have IWYU issues:" % 
len(muted_paths))
+      for p in muted_paths:
+        logging.warning("   %s" % p)
+
+
+  # IWYU output will be relative to the compilation database which is in
+  # the build directory. In order for the fixer script to properly find them, 
we need
+  # to treat all paths relative to that directory and chdir into it first.
+  paths = _relativize_paths(paths)
+  os.chdir(_BUILD_DIR)
+
+  logging.info("Checking %d file(s)...", len(paths))
+  if flags.sort_only:
+    return _do_sort_only(flags, paths)
+  else:
+    return _do_iwyu(flags, paths)
+
+
+if __name__ == "__main__":
+  logging.basicConfig(level=logging.INFO)
+  sys.exit(main(sys.argv))

http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/build-support/iwyu/__init__.py
----------------------------------------------------------------------
diff --git a/build-support/iwyu/__init__.py b/build-support/iwyu/__init__.py
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/build-support/iwyu/fix_includes.py
----------------------------------------------------------------------
diff --git a/build-support/iwyu/fix_includes.py 
b/build-support/iwyu/fix_includes.py
index 37c535f..0407af5 100755
--- a/build-support/iwyu/fix_includes.py
+++ b/build-support/iwyu/fix_includes.py
@@ -9,6 +9,9 @@
 #
 
##===----------------------------------------------------------------------===##
 
+# Modified for Kudu to suit a few idiosyncrasies of the Kudu style.
+# For a detailed changelog, refer to the git log on this file.
+
 from __future__ import print_function
 
 """Update files with the 'correct' #include and forward-declare lines.
@@ -85,6 +88,12 @@ those files are modified.
 The exit code is the number of files that were modified (or that would
 be modified if --dry_run was specified) unless that number exceeds 100,
 in which case 100 is returned.
+
+------------------------------------------------------------
+NOTE: For usage in Kudu, typically this script should be run using the
+wrapper in build-support/fix_includes.py which sets up the arguments
+appropriately for common tasks.
+------------------------------------------------------------
 """
 
 _COMMENT_RE = re.compile(r'\s*//.*')
@@ -121,6 +130,11 @@ _HEADER_GUARD_RE = re.compile(r'$.HEADER_GUARD_RE')
 # about this one.
 _HEADER_GUARD_DEFINE_RE = re.compile(r'\s*#\s*define\s+')
 
+# Infixes used in test filenames like 'foo-test.cc'. These will be
+# canonicalized to be equivalent to  'foo.cc' and thus ensure that 'foo.h'
+# be the first include.
+_TEST_INFIX_RE = re.compile(r'([-_]unittest|[-_]regtest|[-_]test)$')
+
 # We annotate every line in the source file by the re it matches, or None.
 # Note that not all of the above RE's are represented here; for instance,
 # we fold _C_COMMENT_START_RE and _C_COMMENT_END_RE into _COMMENT_LINE_RE.
@@ -147,6 +161,10 @@ _BARRIER_INCLUDES = 
re.compile(r'^\s*#\s*include\s+(<linux/)')
 _SOURCE_EXTENSIONS = [".c", ".C", ".cc", ".CC", ".cxx", ".CXX",
                       ".cpp", ".CPP", ".c++", ".C++", ".cp"]
 
+# Lines matching this pattern will not be deleted even if they look
+# like duplicate includes.
+_IWYU_PRAGMA_KEEP_RE = re.compile(r'IWYU pragma:\s+keep')
+
 
 # Adapt Python 2 iterators to Python 3 syntax
 if sys.version_info[0] < 3:
@@ -1090,6 +1108,8 @@ def _DeleteDuplicateLines(file_lines, line_ranges):
     for line_number in range(*line_range):
       if file_lines[line_number].type in (_BLANK_LINE_RE, _COMMENT_LINE_RE):
         continue
+      if _IWYU_PRAGMA_KEEP_RE.search(file_lines[line_number].line):
+        continue
       uncommented_line = _COMMENT_RE.sub('', file_lines[line_number].line)
       if uncommented_line in seen_lines:
         file_lines[line_number].deleted = True
@@ -1197,9 +1217,10 @@ def _ShouldInsertBlankLine(decorated_move_span, 
next_decorated_move_span,
   if this_kind == next_kind or next_kind == _EOF_KIND:
     return False
 
-  # We also never insert a blank line between C and C++-style #includes,
-  # no matter what the flag value.
-  if (this_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND] and
+  # Unless explicitly requested, we do not insert a blank line between C
+  # and C++-style #includes.
+  if (not flags.blank_line_between_c_and_cxx_includes and
+      this_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND] and
       next_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND]):
     return False
 
@@ -1379,6 +1400,12 @@ def _IsSystemInclude(line_info):
   # The key for #includes includes the <> or "", so this is easy. :-)
   return line_info.type == _INCLUDE_RE and line_info.key[0] == '<'
 
+def _IsThirdpartyInclude(line_info, thirdparty_include_dirs):
+  if line_info.type != _INCLUDE_RE:
+    return False
+  inc_path = line_info.key[1:-1]
+  return any(os.path.exists(os.path.join(inc_dir, inc_path))
+             for inc_dir in thirdparty_include_dirs)
 
 def _IsMainCUInclude(line_info, filename):
   """Given a line-info, return true iff the line is a 'main-CU' #include line.
@@ -1418,7 +1445,7 @@ def _IsMainCUInclude(line_info, filename):
                              '', line_info.key.replace('"', ''))
   # Then normalize includer by stripping extension and Google's test suffixes.
   canonical_file, _ = os.path.splitext(filename)
-  canonical_file = re.sub(r'(_unittest|_regtest|_test)$', '', canonical_file)
+  canonical_file = re.sub(_TEST_INFIX_RE, '', canonical_file)
   # .h files in /public/ match .cc files in /internal/.
   canonical_include2 = re.sub(r'/public/', '/internal/', canonical_include)
 
@@ -1476,17 +1503,28 @@ def _IsSameProject(line_info, edited_file, project):
   return (included_root and edited_root and included_root == edited_root)
 
 
-def _GetLineKind(file_line, filename, separate_project_includes):
-  """Given a file_line + file being edited, return best *_KIND value or 
None."""
+def _GetLineKind(file_line, filename, separate_project_includes, 
thirdparty_include_dirs):
+  """Given a file_line + file being edited, return best *_KIND value or None.
+
+  Arguments:
+    file_line: the LineInfo structure to be analyzed
+    filename: the file which contains the line to be analyzed
+    separate_project_includes: see 'project' parameter of '_IsSameProject()'
+    thirdparty_include_dirs: see help output for the corresponding flag
+  """
   line_without_coments = _COMMENT_RE.sub('', file_line.line)
   if file_line.deleted:
     return None
   elif _IsMainCUInclude(file_line, filename):
     return _MAIN_CU_INCLUDE_KIND
-  elif _IsSystemInclude(file_line) and '.' in line_without_coments:
-    return _C_SYSTEM_INCLUDE_KIND
-  elif _IsSystemInclude(file_line):
-    return _CXX_SYSTEM_INCLUDE_KIND
+  elif _IsSystemInclude(file_line) and \
+         not _IsThirdpartyInclude(file_line, thirdparty_include_dirs):
+    if '.' in line_without_coments:
+      # e.g. <string.h>
+      return _C_SYSTEM_INCLUDE_KIND
+    else:
+      # e.g. <string>
+      return _CXX_SYSTEM_INCLUDE_KIND
   elif file_line.type == _INCLUDE_RE:
     if (separate_project_includes and
         _IsSameProject(file_line, filename, separate_project_includes)):
@@ -1569,7 +1607,8 @@ def _FirstReorderSpanWith(file_lines, good_reorder_spans, 
kind, filename,
   for reorder_span in good_reorder_spans:
     for line_number in range(*reorder_span):
       line_kind = _GetLineKind(file_lines[line_number], filename,
-                               flags.separate_project_includes)
+                               flags.separate_project_includes,
+                               flags.thirdparty_include_dirs)
       # Ignore forward-declares that come after 'contentful' code; we
       # never want to insert new forward-declares there.
       if (line_kind == _FORWARD_DECLARE_KIND and
@@ -1749,7 +1788,8 @@ def _DecoratedMoveSpanLines(iwyu_record, file_lines, 
move_span_lines, flags):
 
   # Next figure out the kind.
   kind = _GetLineKind(firstline, iwyu_record.filename,
-                      flags.separate_project_includes)
+                      flags.separate_project_includes,
+                      flags.thirdparty_include_dirs)
 
   # All we're left to do is the reorder-span we're in.  Hopefully it's easy.
   reorder_span = firstline.reorder_span
@@ -2042,12 +2082,12 @@ def FixManyFiles(iwyu_records, flags):
       if not file_contents:
         continue
 
-      print(">>> Fixing #includes in '%s'" % iwyu_record.filename)
       old_lines, fixed_lines = FixOneFile(iwyu_record, file_contents, flags)
       if old_lines == fixed_lines:
-        print("No changes in file %s" % iwyu_record.filename)
+        if not flags.quiet:
+          print("No changes in file %s" % iwyu_record.filename)
         continue
-
+      print(">>> Fixing #includes in '%s'" % iwyu_record.filename)
       if flags.dry_run:
         PrintFileDiff(old_lines, fixed_lines)
       else:
@@ -2057,18 +2097,19 @@ def FixManyFiles(iwyu_records, flags):
     except FixIncludesError as why:
       print('ERROR: %s - skipping file %s' % (why, iwyu_record.filename))
 
-  print('IWYU edited %d files on your behalf.\n' % files_fixed)
+  print('IWYU %sedited %d files on your behalf.\n' % (
+    flags.dry_run and 'would have ' or '', files_fixed))
   return files_fixed
 
 
-def ProcessIWYUOutput(f, files_to_process, flags):
-  """Fix the #include and forward-declare lines as directed by f.
-
+def ParseAndMergeIWYUOutput(f, files_to_process, flags):
+  """
   Given a file object that has the output of the include_what_you_use
-  script, see every file to be edited and edit it, if appropriate.
+  script, parse this output into a list of IWYUOutputRecord objects.
+  The records are merged so that files mentioned multiple times only
+  result in a single record.
 
   Arguments:
-    f: an iterable object that is the output of include_what_you_use.
     files_to_process: A set of filenames, or None.  If not None, we
        ignore files mentioned in f that are not in files_to_process.
     flags: commandline flags, as parsed by optparse.  The only flag
@@ -2076,9 +2117,7 @@ def ProcessIWYUOutput(f, files_to_process, flags):
        process; we also pass the flags to other routines.
 
   Returns:
-    The number of files that had to be modified (because they weren't
-    already all correct).  In dry_run mode, returns the number of
-    files that would have been modified.
+    The list of IWYUOutputRecord objects.
   """
   # First collect all the iwyu data from stdin.
 
@@ -2113,13 +2152,36 @@ def ProcessIWYUOutput(f, files_to_process, flags):
   # file, but not another, and we need to have merged them above.)
   for filename in iwyu_output_records:
     if not iwyu_output_records[filename].HasContentfulChanges():
-      print('(skipping %s: iwyu reports no contentful changes)' % filename)
+      if not flags.quiet:
+        print('(skipping %s: iwyu reports no contentful changes)' % filename)
       # Mark that we're skipping this file by setting the record to None
       iwyu_output_records[filename] = None
 
+  return [ior for ior in iwyu_output_records.values() if ior]
+
+
+def ProcessIWYUOutput(f, files_to_process, flags):
+  """Fix the #include and forward-declare lines as directed by f.
+
+  Given a file object that has the output of the include_what_you_use
+  script, see every file to be edited and edit it, if appropriate.
+
+  Arguments:
+    f: an iterable object that is the output of include_what_you_use.
+    files_to_process: A set of filenames, or None.  If not None, we
+       ignore files mentioned in f that are not in files_to_process.
+    flags: commandline flags, as parsed by optparse.  The only flag
+       we use directly is flags.ignore_re, to indicate files not to
+       process; we also pass the flags to other routines.
+
+  Returns:
+    The number of files that had to be modified (because they weren't
+    already all correct).  In dry_run mode, returns the number of
+    files that would have been modified.
+  """
+  records = ParseAndMergeIWYUOutput(f, files_to_process, flags)
   # Now do all the fixing, and return the number of files modified
-  contentful_records = [ior for ior in iwyu_output_records.values() if ior]
-  return FixManyFiles(contentful_records, flags)
+  return FixManyFiles(records, flags)
 
 
 def SortIncludesInFiles(files_to_process, flags):
@@ -2148,9 +2210,8 @@ def SortIncludesInFiles(files_to_process, flags):
     sort_only_iwyu_records.append(IWYUOutputRecord(filename))
   return FixManyFiles(sort_only_iwyu_records, flags)
 
-
-def main(argv):
-  # Parse the command line.
+def ParseArgs(args):
+  """ Parse the command line. """
   parser = optparse.OptionParser(usage=_USAGE)
   parser.add_option('-b', '--blank_lines', action='store_true', default=True,
                     help=('Put a blank line between primary header file and'
@@ -2159,6 +2220,12 @@ def main(argv):
                           ' [default]'))
   parser.add_option('--noblank_lines', action='store_false', 
dest='blank_lines')
 
+  parser.add_option('--blank_line_between_c_and_cxx_includes',
+                    action='store_true', default=False,
+                    help=('Put a blank line between the group of C system '
+                          'includes and C++ system includes. Not enabled '
+                          'by default.'))
+
   parser.add_option('--comments', action='store_true', default=False,
                     help='Put comments after the #include lines')
   parser.add_option('--nocomments', action='store_false', dest='comments')
@@ -2179,6 +2246,10 @@ def main(argv):
                           ' else min(the number of files that would be'
                           ' modified, 100)'))
 
+  parser.add_option('-q', '--quiet', action='store_true', default=False,
+                    help=('Do not output anything about files that do not '
+                          'need any changes.'))
+
   parser.add_option('--ignore_re', default=None,
                     help=('fix_includes.py will skip editing any file whose'
                           ' name matches this regular expression.'))
@@ -2192,6 +2263,13 @@ def main(argv):
                           ' same project.  If not specified, project #includes'
                           ' will be sorted with other non-system #includes.'))
 
+  parser.add_option('--thirdparty_include_dir', action="append",
+                    dest="thirdparty_include_dirs",
+                    default=[],
+                    metavar="dir",
+                    help=('Any includes which are found in <dir> are 
considered '
+                          '"other-project" includes rather than "system" 
includes.'))
+
   parser.add_option('--invoking_command_line', default=None,
                     help=('Internal flag used by iwyu.py, It should be the'
                           ' command line used to invoke iwyu.py'))
@@ -2206,7 +2284,11 @@ def main(argv):
   parser.add_option('--nokeep_iwyu_namespace_format', action='store_false',
                     dest='keep_iwyu_namespace_format')
 
-  (flags, files_to_modify) = parser.parse_args(argv[1:])
+  (flags, files_to_modify) = parser.parse_args(args)
+  return flags, files_to_modify
+
+def main(argv):
+  (flags, files_to_modify) = ParseArgs(argv[1:])
   if files_to_modify:
     files_to_modify = set(files_to_modify)
   else:

http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/build-support/iwyu/iwyu-filter.awk
----------------------------------------------------------------------
diff --git a/build-support/iwyu/iwyu-filter.awk 
b/build-support/iwyu/iwyu-filter.awk
deleted file mode 100644
index 0bd1034..0000000
--- a/build-support/iwyu/iwyu-filter.awk
+++ /dev/null
@@ -1,143 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#
-# This is an awk script to process output from the include-what-you-use (IWYU)
-# tool. As of now, IWYU is of alpha quality and it gives many incorrect
-# recommendations -- obviously invalid or leading to compilation breakage.
-# Most of those can be silenced using appropriate IWYU pragmas, but it's not
-# the case for the auto-generated files.
-#
-# Also, it's possible to address invalid recommendation using mappings:
-#   
https://github.com/include-what-you-use/include-what-you-use/blob/master/docs/IWYUMappings.md
-#
-# We are using mappings for the boost library (comes with IWYU) and a few
-# custom mappings for gflags, glog, gtest and other libraries to address some
-# IWYU quirks (hopefully, those should be resolved as IWYU gets better).
-# The kudu.imp mappings file is used to provide Kudu-specific mappings.
-#
-# To run the IWYU tool on every C++ source file in the project,
-# use the recipe below.
-#
-#  1. Run the CMake with -DCMAKE_CXX_INCLUDE_WHAT_YOU_USE=<iwyu_cmd_line>
-#
-#     The path to the IWYU binary should be absolute. The path to the binary
-#     and the command-line options should be separated by semicolon
-#     (that's for feeding it into CMake list variables).
-#
-#     Below is the set of commands to run from the build directory to configure
-#     the build accordingly (line breaks are for better readability):
-#
-#     CC=../../thirdparty/clang-toolchain/bin/clang
-#     CXX=../../thirdparty/clang-toolchain/bin/clang++
-#     IWYU="`pwd`/../../thirdparty/clang-toolchain/bin/include-what-you-use;\
-#       -Xiwyu;--max_line_length=256;\
-#       
-Xiwyu;--mapping_file=`pwd`/../../build-support/iwyu/mappings/boost-all.imp;\
-#       
-Xiwyu;--mapping_file=`pwd`/../../build-support/iwyu/mappings/boost-all-private.imp;\
-#       
-Xiwyu;--mapping_file=`pwd`/../../build-support/iwyu/mappings/boost-extra.imp;\
-#       
-Xiwyu;--mapping_file=`pwd`/../../build-support/iwyu/mappings/gtest.imp;\
-#       
-Xiwyu;--mapping_file=`pwd`/../../build-support/iwyu/mappings/glog.imp;\
-#       
-Xiwyu;--mapping_file=`pwd`/../../build-support/iwyu/mappings/gflags.imp;\
-#       
-Xiwyu;--mapping_file=`pwd`/../../build-support/iwyu/mappings/kudu.imp;\
-#       
-Xiwyu;--mapping_file=`pwd`/../../build-support/iwyu/mappings/libstdcpp.imp\
-#       
-Xiwyu;--mapping_file=`pwd`/../../build-support/iwyu/mappings/system-linux.imp"
-#
-#     ../../build-support/enable_devtoolset.sh \
-#       env CC=$CC CXX=$CXX \
-#       ../../thirdparty/installed/common/bin/cmake \
-#       -DCMAKE_CXX_INCLUDE_WHAT_YOU_USE=\"$IWYU\" \
-#       ../..
-#
-#     NOTE:
-#       Since the Kudu code has some 'ifdef NDEBUG' directives, it's possible
-#       that IWYU would produce different results if run against release, not
-#       debug build. As of now, the tool is used in debug builds only.
-#
-#  2. Run make, separating the output from the IWYU tool into a separate file
-#     (it's possible to use piping the output from the tool to the script
-#      but having a file is good for future reference, if necessary):
-#
-#     make -j$(nproc) 2>/tmp/iwyu.log
-#
-#  3. Process the output from the IWYU tool using the script:
-#
-#     awk -f ../../build-support/iwyu/iwyu-filter.awk /tmp/iwyu.log
-#
-
-BEGIN {
-  # This is the list of the files for which the suggestions from IWYU are
-  # ignored. Eventually, this list should become empty as soon as all the valid
-  # suggestions are addressed and invalid ones are taken care either by proper
-  # IWYU pragmas or adding special mappings (e.g. like boost mappings).
-  muted["kudu/cfile/cfile_reader.h"]
-  muted["kudu/cfile/cfile_writer.h"]
-  muted["kudu/client/client-internal.h"]
-  muted["kudu/client/client-test.cc"]
-  muted["kudu/common/encoded_key-test.cc"]
-  muted["kudu/common/schema.h"]
-  muted["kudu/experiments/rwlock-perf.cc"]
-  muted["kudu/hms/hms_catalog.cc"]
-  muted["kudu/hms/hms_catalog.h"]
-  muted["kudu/hms/hms_client-test.cc"]
-  muted["kudu/hms/hms_client.cc"]
-  muted["kudu/hms/hms_client.h"]
-  muted["kudu/hms/mini_hms.h"]
-  muted["kudu/master/catalog_manager.cc"]
-  muted["kudu/master/catalog_manager.h"]
-  muted["kudu/rpc/reactor.cc"]
-  muted["kudu/rpc/reactor.h"]
-  muted["kudu/security/ca/cert_management.cc"]
-  muted["kudu/security/ca/cert_management.h"]
-  muted["kudu/security/cert-test.cc"]
-  muted["kudu/security/cert.cc"]
-  muted["kudu/security/cert.h"]
-  muted["kudu/security/openssl_util.cc"]
-  muted["kudu/security/openssl_util.h"]
-  muted["kudu/security/tls_context.cc"]
-  muted["kudu/security/tls_handshake.cc"]
-  muted["kudu/security/tls_socket.h"]
-  muted["kudu/security/x509_check_host.cc"]
-  muted["kudu/server/default-path-handlers.cc"]
-  muted["kudu/server/webserver.cc"]
-  muted["kudu/util/bit-util-test.cc"]
-  muted["kudu/util/group_varint-test.cc"]
-  muted["kudu/util/minidump.cc"]
-  muted["kudu/util/mt-metrics-test.cc"]
-  muted["kudu/util/process_memory.cc"]
-  muted["kudu/util/rle-test.cc"]
-}
-
-# mute all suggestions for the auto-generated files
-/.*\.(pb|proxy|service)\.(cc|h) should (add|remove) these lines:/, /^$/ {
-  next
-}
-
-# mute suggestions for the explicitly specified files
-/.* should (add|remove) these lines:/ {
-  do_print = 1
-  for (path in muted) {
-    if (index($0, path)) {
-      do_print = 0
-      break
-    }
-  }
-}
-/^$/ {
-  if (do_print) print
-  do_print = 0
-}
-{ if (do_print) print }

http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/build-support/iwyu/iwyu.sh
----------------------------------------------------------------------
diff --git a/build-support/iwyu/iwyu.sh b/build-support/iwyu/iwyu.sh
deleted file mode 100755
index 417cdf3..0000000
--- a/build-support/iwyu/iwyu.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -e
-set -o pipefail
-
-ROOT=$(cd $(dirname $BASH_SOURCE)/../..; pwd)
-
-# Build the list of updated files which are of IWYU interest.
-# Since '-e' is set, transform the exit code from the grep accordingly:
-# grep returns 1 if no lines were selected.
-file_list_tmp=$(git diff --name-only \
-    $($ROOT/build-support/get-upstream-commit.sh) | \
-    (grep -E '\.(c|cc|h)$' || [ $? -eq 1 ]))
-if [ -z "$file_list_tmp" ]; then
-  echo "IWYU verification: no updates on related files, declaring success"
-  exit 0
-fi
-
-IWYU_LOG=$(mktemp -t kudu-iwyu.XXXXXX)
-UNFILTERED_IWYU_LOG=${IWYU_LOG}.unfiltered
-trap "rm -f $IWYU_LOG $UNFILTERED_IWYU_LOG" EXIT
-
-# Adjust the path for every element in the list. The iwyu_tool.py normalizes
-# paths (via realpath) to match the records from the compilation database.
-IWYU_FILE_LIST=
-for p in $file_list_tmp; do
-  IWYU_FILE_LIST="$IWYU_FILE_LIST $ROOT/$p"
-done
-
-IWYU_ARGS="--max_line_length=256"
-
-IWYU_MAPPINGS_PATH="$ROOT/build-support/iwyu/mappings"
-for path in $IWYU_MAPPINGS_PATH/*.imp ; do
-    IWYU_ARGS="$IWYU_ARGS --mapping_file=$path"
-done
-
-if ! PATH="$PATH:$PWD/../../thirdparty/clang-toolchain/bin" \
-    python $ROOT/build-support/iwyu/iwyu_tool.py -p . $IWYU_FILE_LIST -- \
-    $IWYU_ARGS > $UNFILTERED_IWYU_LOG 2>&1; then
-  echo "IWYU verification: failed to run the tool, see below for details"
-  cat $UNFILTERED_IWYU_LOG
-  exit 2
-fi
-
-awk -f $ROOT/build-support/iwyu/iwyu-filter.awk $UNFILTERED_IWYU_LOG | \
-    tee $IWYU_LOG
-if [ -s "$IWYU_LOG" ]; then
-  # The output is not empty: IWYU finds the set of headers to be inconsistent.
-  echo "IWYU verification: changelist needs correction, see above for details"
-  exit 1
-fi
-
-# The output is empty: the changelist looks good.
-echo "IWYU verification: the changes look good"
-exit 0

http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/build-support/kudu_util.py
----------------------------------------------------------------------
diff --git a/build-support/kudu_util.py b/build-support/kudu_util.py
index 284042c..eac5115 100644
--- a/build-support/kudu_util.py
+++ b/build-support/kudu_util.py
@@ -26,12 +26,23 @@ import os
 import subprocess
 import sys
 
+ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+
+_GET_UPSTREAM_COMMIT_SCRIPT = os.path.join(ROOT, "build-support", 
"get-upstream-commit.sh")
+
+
 # Alias raw_input() to input() in Python 2.
 try:
   input = raw_input
 except NameError:
   pass
 
+
+def get_upstream_commit():
+  """ Return the last commit hash that appears to have been committed by 
gerrit. """
+  return check_output(_GET_UPSTREAM_COMMIT_SCRIPT).strip().decode('utf-8')
+
+
 class Colors(object):
   """ ANSI color codes. """
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/build-support/release/rat_exclude_files.txt
----------------------------------------------------------------------
diff --git a/build-support/release/rat_exclude_files.txt 
b/build-support/release/rat_exclude_files.txt
index 1ea17fc..4742d64 100644
--- a/build-support/release/rat_exclude_files.txt
+++ b/build-support/release/rat_exclude_files.txt
@@ -8,6 +8,7 @@ pax_global_header
 *.bib
 *.pdf
 version.txt
+build-support/iwyu/__init__.py
 build-support/iwyu/fix_includes.py
 build-support/iwyu/iwyu_tool.py
 build-support/iwyu/mappings/boost-all-private.imp

http://git-wip-us.apache.org/repos/asf/kudu/blob/b4639690/src/kudu/cfile/bitshuffle_arch_wrapper.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/bitshuffle_arch_wrapper.cc 
b/src/kudu/cfile/bitshuffle_arch_wrapper.cc
index 0565484..4799cd5 100644
--- a/src/kudu/cfile/bitshuffle_arch_wrapper.cc
+++ b/src/kudu/cfile/bitshuffle_arch_wrapper.cc
@@ -29,7 +29,7 @@
 #define bshuf_compress_lz4_bound bshuf_compress_lz4_bound_avx2
 #define bshuf_compress_lz4 bshuf_compress_lz4_avx2
 #define bshuf_decompress_lz4 bshuf_decompress_lz4_avx2
-#include <bitshuffle.h> // NOLINT(*)
+#include <bitshuffle.h> // NOLINT(*) IWYU pragma: keep
 #undef bshuf_compress_lz4_bound
 #undef bshuf_compress_lz4
 #undef bshuf_decompress_lz4

Reply via email to