This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a665b82 ARROW-4123: [C++] Enable linting tools to be run on Windows
a665b82 is described below
commit a665b829124ffa3850b30c6b2753088785471ac5
Author: Benjamin Kietzman <[email protected]>
AuthorDate: Sun Jan 20 10:22:00 2019 -0600
ARROW-4123: [C++] Enable linting tools to be run on Windows
* replaced lint and tidy targets with python scripts, ala
run_clang_format.py
* factored out some common functionality into a helper module (`lintutils`)
for those scripts
* add (windows) default LLVM install directory to FindClangTools' search
path
Author: Benjamin Kietzman <[email protected]>
Closes #3374 from bkietz/ARROW-4123-improve-windows-linting and squashes
the following commits:
c0bec4ea8 <Benjamin Kietzman> Rework linting into Python scripts so they
will work on Windows
---
cpp/CMakeLists.txt | 107 +++++++-------
...g_format_exclusions.txt => lint_exclusions.txt} | 0
cpp/build-support/lintutils.py | 115 ++++++++++++++++
cpp/build-support/run-clang-tidy.sh | 45 ------
cpp/build-support/run_clang_format.py | 153 ++++++++++-----------
cpp/build-support/run_clang_tidy.py | 121 ++++++++++++++++
cpp/build-support/run_cpplint.py | 123 +++++++++++++++++
cpp/cmake_modules/FindClangTools.cmake | 49 +++----
cpp/cmake_modules/FindInferTools.cmake | 4 +-
dev/release/rat_exclude_files.txt | 2 +-
r/lint.sh | 7 +-
11 files changed, 520 insertions(+), 206 deletions(-)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index f673b52..459cb3a 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -64,6 +64,12 @@ if(POLICY CMP0054)
cmake_policy(SET CMP0054 NEW)
endif()
+# don't ignore <PackageName>_ROOT variables in find_package
+if(POLICY CMP0074)
+ # https://cmake.org/cmake/help/v3.12/policy/CMP0074.html
+ cmake_policy(SET CMP0074 NEW)
+endif()
+
set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
set(CLANG_FORMAT_VERSION "6.0")
@@ -82,6 +88,7 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR
INFER_FOUND)
# See http://clang.llvm.org/docs/JSONCompilationDatabase.html
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
endif()
+
# ----------------------------------------------------------------------
# cmake options
@@ -356,6 +363,9 @@ that have not been built"
OFF)
endif()
+# Needed for linting targets, etc.
+find_package(PythonInterp)
+
if (ARROW_USE_CCACHE)
find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND)
@@ -379,65 +389,68 @@ if (NOT ARROW_VERBOSE_LINT)
set(ARROW_LINT_QUIET "--quiet")
endif()
-if (UNIX)
+if (NOT LINT_EXCLUSIONS_FILE)
+ # source files matching a glob from a line in this file
+ # will be excluded from linting (cpplint, clang-tidy, clang-format)
+ set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt)
+endif()
- file(GLOB_RECURSE LINT_FILES
- "${CMAKE_CURRENT_SOURCE_DIR}/src/*.h"
- "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cc"
- )
-
- FOREACH(item ${LINT_FILES})
- IF(NOT ((item MATCHES "_generated.h") OR
- (item MATCHES "pyarrow_api.h") OR
- (item MATCHES "pyarrow_lib.h") OR
- (item MATCHES "config.h") OR
- (item MATCHES "vendored/") OR
- (item MATCHES "zmalloc.h") OR
- (item MATCHES "ae.h")))
- LIST(APPEND FILTERED_LINT_FILES ${item})
- ENDIF()
- ENDFOREACH(item ${LINT_FILES})
-
- find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR})
- message(STATUS "Found cpplint executable at ${CPPLINT_BIN}")
-
- # Full lint
- # Balancing act: cpplint.py takes a non-trivial time to launch,
- # so process 12 files per invocation, while still ensuring parallelism
- add_custom_target(lint echo ${FILTERED_LINT_FILES} | xargs -n12 -P8
- ${CPPLINT_BIN}
- --verbose=2 ${ARROW_LINT_QUIET}
- --linelength=90
-
--filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11,-runtime/references,-build/include_order
- )
-endif (UNIX)
+find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR})
+message(STATUS "Found cpplint executable at ${CPPLINT_BIN}")
+
+add_custom_target(lint
+ ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_cpplint.py
+ --cpplint_binary ${CPPLINT_BIN}
+ --exclude_globs ${LINT_EXCLUSIONS_FILE}
+ --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+ ${ARROW_LINT_QUIET})
############################################################
# "make format" and "make check-format" targets
############################################################
-
-# runs clang format and updates files in place.
-add_custom_target(format ${BUILD_SUPPORT_DIR}/run_clang_format.py
- ${CLANG_FORMAT_BIN}
- ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
- ${CMAKE_CURRENT_SOURCE_DIR}/src --fix ${ARROW_LINT_QUIET})
-
-# runs clang format and exits with a non-zero exit code if any files need to
be reformatted
-add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run_clang_format.py
- ${CLANG_FORMAT_BIN}
- ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
- ${CMAKE_CURRENT_SOURCE_DIR}/src ${ARROW_LINT_QUIET})
+if (${CLANG_FORMAT_FOUND})
+ # runs clang format and updates files in place.
+ add_custom_target(format
+ ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_format.py
+ --clang_format_binary ${CLANG_FORMAT_BIN}
+ --exclude_globs ${LINT_EXCLUSIONS_FILE}
+ --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+ --fix
+ ${ARROW_LINT_QUIET})
+
+ # runs clang format and exits with a non-zero exit code if any files need to
be reformatted
+ add_custom_target(check-format
+ ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_format.py
+ --clang_format_binary ${CLANG_FORMAT_BIN}
+ --exclude_globs ${LINT_EXCLUSIONS_FILE}
+ --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+ ${ARROW_LINT_QUIET})
+endif()
############################################################
# "make clang-tidy" and "make check-clang-tidy" targets
############################################################
if (${CLANG_TIDY_FOUND})
+ # TODO check to make sure .clang-tidy is being respected
+
# runs clang-tidy and attempts to fix any warning automatically
- add_custom_target(clang-tidy ${BUILD_SUPPORT_DIR}/run-clang-tidy.sh
${CLANG_TIDY_BIN} ${CMAKE_BINARY_DIR}/compile_commands.json 1
- `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc | sed -e '/_generated/g'`)
+ add_custom_target(clang-tidy
+ ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_tidy.py
+ --clang_tidy_binary ${CLANG_TIDY_BIN}
+ --exclude_globs ${LINT_EXCLUSIONS_FILE}
+ --compile_commands ${CMAKE_BINARY_DIR}/compile_commands.json
+ --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+ --fix
+ ${ARROW_LINT_QUIET})
+
# runs clang-tidy and exits with a non-zero exit code if any errors are
found.
- add_custom_target(check-clang-tidy ${BUILD_SUPPORT_DIR}/run-clang-tidy.sh
${CLANG_TIDY_BIN} ${CMAKE_BINARY_DIR}/compile_commands.json
- 0 `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc |grep -v -F -f
${CMAKE_CURRENT_SOURCE_DIR}/src/.clang-tidy-ignore | sed -e '/_generated/g'`)
+ add_custom_target(check-clang-tidy
+ ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_tidy.py
+ --clang_tidy_binary ${CLANG_TIDY_BIN}
+ --exclude_globs ${LINT_EXCLUSIONS_FILE}
+ --compile_commands ${CMAKE_BINARY_DIR}/compile_commands.json
+ --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+ ${ARROW_LINT_QUIET})
endif()
if (ARROW_ONLY_LINT)
diff --git a/cpp/build-support/clang_format_exclusions.txt
b/cpp/build-support/lint_exclusions.txt
similarity index 100%
rename from cpp/build-support/clang_format_exclusions.txt
rename to cpp/build-support/lint_exclusions.txt
diff --git a/cpp/build-support/lintutils.py b/cpp/build-support/lintutils.py
new file mode 100644
index 0000000..1657cf2
--- /dev/null
+++ b/cpp/build-support/lintutils.py
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+from fnmatch import fnmatch
+from pathlib import Path
+from subprocess import Popen, CompletedProcess
+from collections.abc import Mapping, Sequence
+
+
+def chunk(seq, n):
+ """
+ divide a sequence into equal sized chunks
+ (the last chunk may be smaller, but won't be empty)
+ """
+ some = []
+ for element in seq:
+ if len(some) == n:
+ yield some
+ some = []
+ else:
+ some.append(element)
+ if len(some) > 0:
+ yield some
+
+
+def dechunk(chunks):
+ "flatten chunks into a single list"
+ seq = []
+ for chunk in chunks:
+ seq.extend(chunk)
+ return seq
+
+
+def run_parallel(cmds, **kwargs):
+ """
+ run each of cmds (with shared **kwargs) using subprocess.Popen
+ then wait for all of them to complete
+ returns a list of each command's CompletedProcess
+ """
+ procs = []
+ for cmd in cmds:
+ if not isinstance(cmd, Sequence):
+ continue
+ procs.append(Popen(cmd, **kwargs))
+
+ for cmd in cmds:
+ if not isinstance(cmd, Mapping):
+ continue
+ cmd_kwargs = kwargs.copy()
+ cmd_kwargs.update(cmd)
+ procs.append(Popen(**cmd_kwargs))
+
+ complete = []
+ for proc in procs:
+ result = proc.communicate()
+ c = CompletedProcess(proc.args, proc.returncode)
+ c.stdout, c.stderr = result
+ complete.append(c)
+ return complete
+
+
+_source_extensions = '''
+.h
+.cc
+'''.split()
+
+
+def get_sources(source_dir, exclude_globs=[]):
+ sources = []
+ for directory, subdirs, basenames in os.walk(source_dir):
+ for path in [Path(directory) / basename for basename in basenames]:
+ # filter out non-source files
+ if path.suffix not in _source_extensions:
+ continue
+
+ # filter out files that match the globs in the globs file
+ if any([fnmatch(str(path), glob) for glob in exclude_globs]):
+ continue
+
+ sources.append(path.resolve())
+ return sources
+
+
+def stdout_pathcolonline(completed_process, filenames):
+ """
+ given a completed process which may have reported some files as problematic
+ by printing the path name followed by ':' then a line number, examine
+ stdout and return the set of actually reported file names
+ """
+ bfilenames = set()
+ for filename in filenames:
+ bfilenames.add(filename.encode('utf-8') + b':')
+ problem_files = set()
+ for line in completed_process.stdout.splitlines():
+ for filename in bfilenames:
+ if line.startswith(filename):
+ problem_files.add(filename.decode('utf-8'))
+ bfilenames.remove(filename)
+ break
+ return problem_files, completed_process.stdout
diff --git a/cpp/build-support/run-clang-tidy.sh
b/cpp/build-support/run-clang-tidy.sh
deleted file mode 100755
index 75e9458..0000000
--- a/cpp/build-support/run-clang-tidy.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#
-# Runs clang format in the given directory
-# Arguments:
-# $1 - Path to the clang tidy binary
-# $2 - Path to the compile_commands.json to use
-# $3 - Apply fixes (will raise an error if false and not there where changes)
-# $ARGN - Files to run clang-tidy on
-#
-CLANG_TIDY=$1
-shift
-COMPILE_COMMANDS=$1
-shift
-APPLY_FIXES=$1
-shift
-
-# clang format will only find its configuration if we are in
-# the source tree or in a path relative to the source tree
-if [ "$APPLY_FIXES" == "1" ]; then
- $CLANG_TIDY -p $COMPILE_COMMANDS -fix $@
-else
- NUM_CORRECTIONS=`$CLANG_TIDY -p $COMPILE_COMMANDS $@ 2>&1 | grep -v Skipping
| grep "warnings* generated" | wc -l`
- if [ "$NUM_CORRECTIONS" -gt "0" ]; then
- echo "clang-tidy had suggested fixes. Please fix these!!!"
- exit 1
- fi
-fi
diff --git a/cpp/build-support/run_clang_format.py
b/cpp/build-support/run_clang_format.py
index 24dcabb..8d1bb23 100755
--- a/cpp/build-support/run_clang_format.py
+++ b/cpp/build-support/run_clang_format.py
@@ -16,74 +16,51 @@
# specific language governing permissions and limitations
# under the License.
+import lintutils
+from subprocess import PIPE
import argparse
import difflib
-import fnmatch
import multiprocessing as mp
-import os
-import subprocess
import sys
+from functools import partial
-class FileChecker(object):
+# examine the output of clang-format and if changes are
+# present assemble a (unified)patch of the difference
+def _check_one_file(completed_processes, filename):
+ with open(filename, "rb") as reader:
+ original = reader.read()
- def __init__(self, arguments):
- self.quiet = arguments.quiet
- self.clang_format_binary = arguments.clang_format_binary
-
- def run(self, filename):
- if not self.quiet:
- print("Checking {}".format(filename))
- #
- # Due to some incompatibilities between Python 2 and
- # Python 3, there are some specific actions we take here
- # to make sure the difflib.unified_diff call works.
- #
- # In Python 2, the call to subprocess.check_output return
- # a 'str' type. In Python 3, however, the call returns a
- # 'bytes' type unless the 'encoding' argument is
- # specified. Unfortunately, the 'encoding' argument is not
- # in the Python 2 API. We could do an if/else here based
- # on the version of Python we are running, but it's more
- # straightforward to read the file in binary and do utf-8
- # conversion. In Python 2, it's just converting string
- # types to unicode types, whereas in Python 3 it's
- # converting bytes types to utf-8 encoded str types. This
- # approach ensures that the arguments to
- # difflib.unified_diff are acceptable string types in both
- # Python 2 and Python 3.
- with open(filename, "rb") as reader:
- original = reader.read().decode('utf8')
+ formatted = completed_processes[filename].stdout
+ if formatted != original:
+ # Run the equivalent of diff -u
+ diff = list(difflib.unified_diff(
+ original.decode('utf8').splitlines(True),
+ formatted.decode('utf8').splitlines(True),
+ fromfile=filename,
+ tofile="{} (after clang format)".format(
+ filename)))
+ else:
+ diff = None
- # Run clang-format and capture its output
- formatted = subprocess.check_output(
- [self.clang_format_binary,
- filename])
- formatted = formatted.decode('utf8')
- if formatted != original:
- # Run the equivalent of diff -u
- diff = list(difflib.unified_diff(
- original.splitlines(True),
- formatted.splitlines(True),
- fromfile=filename,
- tofile="{} (after clang format)".format(
- filename)))
- if diff:
- return filename, diff
+ return filename, diff
if __name__ == "__main__":
parser = argparse.ArgumentParser(
- description="Runs clang format on all of the source "
- "files. If --fix is specified, and compares the output "
- "with the existing file, outputting a unifiied diff if "
- "there are any necessary changes")
- parser.add_argument("clang_format_binary",
+ description="Runs clang-format on all of the source "
+ "files. If --fix is specified enforce format by "
+ "modifying in place, otherwise compare the output "
+ "with the existing file and output any necessary "
+ "changes as a patch in unified diff format")
+ parser.add_argument("--clang_format_binary",
+ required=True,
help="Path to the clang-format binary")
- parser.add_argument("exclude_globs",
+ parser.add_argument("--exclude_globs",
help="Filename containing globs for files "
"that should be excluded from the checks")
- parser.add_argument("source_dir",
+ parser.add_argument("--source_dir",
+ required=True,
help="Root directory of the source code")
parser.add_argument("--fix", default=False,
action="store_true",
@@ -93,47 +70,65 @@ if __name__ == "__main__":
parser.add_argument("--quiet", default=False,
action="store_true",
help="If specified, only print errors")
-
arguments = parser.parse_args()
+ exclude_globs = []
+ if arguments.exclude_globs:
+ for line in open(arguments.exclude_globs):
+ exclude_globs.append(line.strip())
+
formatted_filenames = []
- exclude_globs = [line.strip() for line in open(arguments.exclude_globs)]
- for directory, subdirs, filenames in os.walk(arguments.source_dir):
- fullpaths = (os.path.join(directory, filename)
- for filename in filenames)
- source_files = [x for x in fullpaths
- if x.endswith(".h") or
- x.endswith(".cc") or
- x.endswith(".cpp")]
- formatted_filenames.extend(
- # Filter out files that match the globs in the globs file
- [filename for filename in source_files
- if not any((fnmatch.fnmatch(filename, exclude_glob)
- for exclude_glob in exclude_globs))])
+ for path in lintutils.get_sources(arguments.source_dir, exclude_globs):
+ formatted_filenames.append(str(path))
- error = False
if arguments.fix:
if not arguments.quiet:
- # Print out each file on its own line, but run
- # clang format once for all of the files
print("\n".join(map(lambda x: "Formatting {}".format(x),
formatted_filenames)))
- subprocess.check_call([arguments.clang_format_binary,
- "-i"] + formatted_filenames)
+
+ # Break clang-format invocations into chunks: each invocation formats
+ # 16 files. Wait for all processes to complete
+ results = lintutils.run_parallel([
+ [arguments.clang_format_binary, "-i"] + some
+ for some in lintutils.chunk(formatted_filenames, 16)
+ ])
+ for result in results:
+ # if any clang-format reported a parse error, bubble it
+ result.check_returncode()
+
else:
- checker = FileChecker(arguments)
+ # run an instance of clang-format for each source file in parallel,
+ # then wait for all processes to complete
+ results = lintutils.run_parallel([
+ [arguments.clang_format_binary, filename]
+ for filename in formatted_filenames
+ ], stdout=PIPE, stderr=PIPE)
+ for result in results:
+ # if any clang-format reported a parse error, bubble it
+ result.check_returncode()
+
+ error = False
+ checker = partial(_check_one_file, {
+ filename: result
+ for filename, result in zip(formatted_filenames, results)
+ })
pool = mp.Pool()
try:
- for res in pool.imap(checker.run, formatted_filenames):
- if res is not None:
- filename, diff = res
+ # check the output from each invocation of clang-format in parallel
+ for filename, diff in pool.imap(checker, formatted_filenames):
+ if not arguments.quiet:
+ print("Checking {}".format(filename))
+ if diff:
print("{} had clang-format style issues".format(filename))
# Print out the diff to stderr
error = True
+ # pad with a newline
+ print(file=sys.stderr)
sys.stderr.writelines(diff)
+ except Exception:
+ error = True
+ raise
finally:
pool.terminate()
pool.join()
-
-
- sys.exit(1 if error else 0)
+ sys.exit(1 if error else 0)
diff --git a/cpp/build-support/run_clang_tidy.py
b/cpp/build-support/run_clang_tidy.py
new file mode 100755
index 0000000..8e1be5a
--- /dev/null
+++ b/cpp/build-support/run_clang_tidy.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import multiprocessing as mp
+import lintutils
+from subprocess import PIPE, DEVNULL
+import sys
+from functools import partial
+
+
+def _get_chunk_key(filenames):
+ # lists are not hashable so key on the first filename in a chunk
+ return filenames[0]
+
+
+# clang-tidy outputs complaints in '/path:line_number: complaint' format,
+# so we can scan its output to get a list of files to fix
+def _check_some_files(completed_processes, filenames):
+ result = completed_processes[_get_chunk_key(filenames)]
+ return lintutils.stdout_pathcolonline(result, filenames)
+
+
+def _check_all(cmd, all_filenames):
+ # Output from clang-tidy can be massive, so only run 16 instances
+ # of it at a time. Process and drop that output before running more
+ for filenames in lintutils.chunk(all_filenames, 16 ** 2):
+ # each clang-tidy instance will process 16 files
+ chunks = list(lintutils.chunk(filenames, 16))
+ cmds = [cmd + some for some in chunks]
+ results = lintutils.run_parallel(cmds, stderr=DEVNULL, stdout=PIPE)
+
+ error = False
+ # record completed processes (keyed by the first filename in the input
chunk)
+ # for lookup in _check_some_files
+ completed_processes = {
+ _get_chunk_key(some): result
+ for some, result in zip(chunks, results)
+ }
+ checker = partial(_check_some_files, completed_processes)
+ pool = mp.Pool()
+ try:
+ # check output of completed clang-tidy invocations in parallel
+ for problem_files, stdout in pool.imap(checker, chunks):
+ if problem_files:
+ msg = "clang-tidy suggested fixes for {}"
+ print("\n".join(map(msg.format, problem_files)))
+ error = True
+ except Exception:
+ error = True
+ raise
+ finally:
+ pool.terminate()
+ pool.join()
+
+ if error:
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Runs clang-tidy on all ")
+ parser.add_argument("--clang_tidy_binary",
+ required=True,
+ help="Path to the clang-tidy binary")
+ parser.add_argument("--exclude_globs",
+ help="Filename containing globs for files "
+ "that should be excluded from the checks")
+ parser.add_argument("--compile_commands",
+ required=True,
+ help="compile_commands.json to pass clang-tidy")
+ parser.add_argument("--source_dir",
+ required=True,
+ help="Root directory of the source code")
+ parser.add_argument("--fix", default=False,
+ action="store_true",
+ help="If specified, will attempt to fix the "
+ "source code instead of recommending fixes, "
+ "defaults to %(default)s")
+ parser.add_argument("--quiet", default=False,
+ action="store_true",
+ help="If specified, only print errors")
+ arguments = parser.parse_args()
+
+ linted_filenames = []
+ for path in lintutils.get_sources(arguments.source_dir):
+ linted_filenames.append(str(path))
+
+ if not arguments.quiet:
+ msg = 'Tidying {}' if arguments.fix else 'Checking {}'
+ print("\n".join(map(msg.format, linted_filenames)))
+
+ cmd = [
+ arguments.clang_tidy_binary,
+ '-p',
+ arguments.compile_commands
+ ]
+ if arguments.fix:
+ cmd.append('-fix')
+ results = lintutils.run_parallel(
+ [cmd + some for some in lintutils.chunk(linted_filenames, 16)])
+ for result in results:
+ result.check_returncode()
+
+ else:
+ _check_all(cmd, linted_filenames)
diff --git a/cpp/build-support/run_cpplint.py b/cpp/build-support/run_cpplint.py
new file mode 100755
index 0000000..83fdc96
--- /dev/null
+++ b/cpp/build-support/run_cpplint.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import lintutils
+from subprocess import PIPE, STDOUT
+import argparse
+import multiprocessing as mp
+import sys
+import platform
+from functools import partial
+
+
+_filters = '''
+-whitespace/comments
+-readability/todo
+-build/header_guard
+-build/c++11
+-runtime/references
+-build/include_order
+'''.split()
+
+
+def _get_chunk_key(filenames):
+ # lists are not hashable so key on the first filename in a chunk
+ return filenames[0]
+
+
+def _check_some_files(completed_processes, filenames):
+ # cpplint outputs complaints in '/path:line_number: complaint' format,
+ # so we can scan its output to get a list of files to fix
+ result = completed_processes[_get_chunk_key(filenames)]
+ return lintutils.stdout_pathcolonline(result, filenames)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Runs cpplint on all of the source files.")
+ parser.add_argument("--cpplint_binary",
+ required=True,
+ help="Path to the cpplint binary")
+ parser.add_argument("--exclude_globs",
+ help="Filename containing globs for files "
+ "that should be excluded from the checks")
+ parser.add_argument("--source_dir",
+ required=True,
+ help="Root directory of the source code")
+ parser.add_argument("--quiet", default=False,
+ action="store_true",
+ help="If specified, only print errors")
+ arguments = parser.parse_args()
+
+ exclude_globs = []
+ if arguments.exclude_globs:
+ for line in open(arguments.exclude_globs):
+ exclude_globs.append(line.strip())
+
+ linted_filenames = []
+ for path in lintutils.get_sources(arguments.source_dir, exclude_globs):
+ linted_filenames.append(str(path))
+
+ cmd = [
+ arguments.cpplint_binary,
+ '--verbose=2',
+ '--linelength=90',
+ '--filter=' + ','.join(_filters)
+ ]
+ if (arguments.cpplint_binary.endswith('.py') and
+ platform.system() == 'Windows'):
+ # Windows doesn't support executable scripts; execute with
+ # sys.executable
+ cmd.insert(0, sys.executable)
+ if arguments.quiet:
+ cmd.append('--quiet')
+ else:
+ print("\n".join(map(lambda x: "Linting {}".format(x),
+ linted_filenames)))
+
+ # lint files in chunks: each invocation of cpplint will process 16 files
+ chunks = list(lintutils.chunk(linted_filenames, 16))
+ cmds = [cmd + some for some in chunks]
+ results = lintutils.run_parallel(cmds, stdout=PIPE, stderr=STDOUT)
+
+ error = False
+ # record completed processes (keyed by the first filename in the input
+ # chunk) for lookup in _check_some_files
+ completed_processes = {
+ _get_chunk_key(filenames): result
+ for filenames, result in zip(chunks, results)
+ }
+ checker = partial(_check_some_files, completed_processes)
+ pool = mp.Pool()
+ try:
+ # scan the outputs of various cpplint invocations in parallel to
+ # distill a list of problematic files
+ for problem_files, stdout in pool.imap(checker, chunks):
+ if problem_files:
+ msg = "{} had cpplint issues"
+ print("\n".join(map(msg.format, problem_files)))
+ print(stdout, file=sys.stderr)
+ error = True
+ except Exception:
+ error = True
+ raise
+ finally:
+ pool.terminate()
+ pool.join()
+
+ sys.exit(1 if error else 0)
diff --git a/cpp/cmake_modules/FindClangTools.cmake
b/cpp/cmake_modules/FindClangTools.cmake
index 62ee8c3..55b425f 100644
--- a/cpp/cmake_modules/FindClangTools.cmake
+++ b/cpp/cmake_modules/FindClangTools.cmake
@@ -17,11 +17,15 @@
#
# find_package(ClangTools)
#
-# Variables used by this module, they can change the default behaviour and need
+# Variables used by this module which can change the default behaviour and need
# to be set before calling find_package:
#
+# CLANG_FORMAT_VERSION -
+# The version of clang-format to find. If this is not specified, clang-format
+# will not be searched for.
+#
# ClangTools_PATH -
-# When set, this path is inspected instead of standard library binary
locations
+# When set, this path is inspected in addition to standard library binary
locations
# to find clang-tidy and clang-format
#
# This module defines
@@ -45,6 +49,13 @@ else()
endif()
endif()
+set(CLANG_TOOLS_SEARCH_PATHS
+ ${ClangTools_PATH}
+ $ENV{CLANG_TOOLS_PATH}
+ /usr/local/bin /usr/bin
+ "C:/Program Files/LLVM/bin"
+ "${HOMEBREW_PREFIX}/bin")
+
find_program(CLANG_TIDY_BIN
NAMES clang-tidy-4.0
clang-tidy-3.9
@@ -52,26 +63,21 @@ find_program(CLANG_TIDY_BIN
clang-tidy-3.7
clang-tidy-3.6
clang-tidy
- PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
"${HOMEBREW_PREFIX}/bin"
- NO_DEFAULT_PATH
+ PATHS ${CLANG_TOOLS_SEARCH_PATHS} NO_DEFAULT_PATH
)
if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" )
set(CLANG_TIDY_FOUND 0)
- message("clang-tidy not found")
+ message(STATUS "clang-tidy not found")
else()
set(CLANG_TIDY_FOUND 1)
- message("clang-tidy found at ${CLANG_TIDY_BIN}")
+ message(STATUS "clang-tidy found at ${CLANG_TIDY_BIN}")
endif()
if (CLANG_FORMAT_VERSION)
find_program(CLANG_FORMAT_BIN
NAMES clang-format-${CLANG_FORMAT_VERSION}
- PATHS
- ${ClangTools_PATH}
- $ENV{CLANG_TOOLS_PATH}
- /usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin"
- NO_DEFAULT_PATH
+ PATHS ${CLANG_TOOLS_SEARCH_PATHS} NO_DEFAULT_PATH
)
# If not found yet, search alternative locations
@@ -107,11 +113,7 @@ if (CLANG_FORMAT_VERSION)
# try searching for "clang-format" and check the version
find_program(CLANG_FORMAT_BIN
NAMES clang-format
- PATHS
- ${ClangTools_PATH}
- $ENV{CLANG_TOOLS_PATH}
- /usr/local/bin /usr/bin
- NO_DEFAULT_PATH
+ PATHS ${CLANG_TOOLS_SEARCH_PATHS} NO_DEFAULT_PATH
)
if (NOT ("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND"))
execute_process(COMMAND ${CLANG_FORMAT_BIN} "-version"
@@ -124,23 +126,12 @@ if (CLANG_FORMAT_VERSION)
endif()
endif()
-else()
- find_program(CLANG_FORMAT_BIN
- NAMES clang-format-4.0
- clang-format-3.9
- clang-format-3.8
- clang-format-3.7
- clang-format-3.6
- clang-format
- PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin
"${HOMEBREW_PREFIX}/bin"
- NO_DEFAULT_PATH
- )
endif()
if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" )
set(CLANG_FORMAT_FOUND 0)
- message("clang-format not found")
+ message(STATUS "clang-format not found")
else()
set(CLANG_FORMAT_FOUND 1)
- message("clang-format found at ${CLANG_FORMAT_BIN}")
+ message(STATUS "clang-format found at ${CLANG_FORMAT_BIN}")
endif()
diff --git a/cpp/cmake_modules/FindInferTools.cmake
b/cpp/cmake_modules/FindInferTools.cmake
index 00c6709..e2d1020 100644
--- a/cpp/cmake_modules/FindInferTools.cmake
+++ b/cpp/cmake_modules/FindInferTools.cmake
@@ -38,8 +38,8 @@ find_program(INFER_BIN
if ( "${INFER_BIN}" STREQUAL "INFER_BIN-NOTFOUND" )
set(INFER_FOUND 0)
- message("infer not found")
+ message(STATUS "infer not found")
else()
set(INFER_FOUND 1)
- message("infer found at ${INFER_BIN}")
+ message(STATUS "infer found at ${INFER_BIN}")
endif()
diff --git a/dev/release/rat_exclude_files.txt
b/dev/release/rat_exclude_files.txt
index 552c3e1..4866ec2 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -16,7 +16,7 @@ cpp/src/arrow/status.h
cpp/src/arrow/vendored/*
cpp/build-support/asan_symbolize.py
cpp/build-support/cpplint.py
-cpp/build-support/clang_format_exclusions.txt
+cpp/build-support/lint_exclusions.txt
cpp/build-support/iwyu/*
cpp/cmake_modules/BuildUtils.cmake
cpp/cmake_modules/FindPythonLibsNew.cmake
diff --git a/r/lint.sh b/r/lint.sh
index 14e457d..8a9a2fb 100755
--- a/r/lint.sh
+++ b/r/lint.sh
@@ -23,6 +23,7 @@ CPP_BUILD_SUPPORT=$SOURCE_DIR/../cpp/build-support
LLVM_VERSION=6.0
CLANG_FORMAT=clang-format-$LLVM_VERSION
-$CPP_BUILD_SUPPORT/run_clang_format.py $CLANG_FORMAT \
-
$CPP_BUILD_SUPPORT/clang_format_exclusions.txt \
- $SOURCE_DIR/src --quiet $1
+$CPP_BUILD_SUPPORT/run_clang_format.py \
+ --clang_format_binary=$CLANG_FORMAT \
+ --exclude_glob=$CPP_BUILD_SUPPORT/lint_exclusions.txt \
+ --source_dir=$SOURCE_DIR/src --quiet $1