This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a665b82  ARROW-4123: [C++] Enable linting tools to be run on Windows
a665b82 is described below

commit a665b829124ffa3850b30c6b2753088785471ac5
Author: Benjamin Kietzman <[email protected]>
AuthorDate: Sun Jan 20 10:22:00 2019 -0600

    ARROW-4123: [C++] Enable linting tools to be run on Windows
    
    * replaced lint and tidy targets with python scripts, ala 
run_clang_format.py
    * factored out some common functionality into a helper module (`lintutils`) 
for those scripts
    * add (windows) default LLVM install directory to FindClangTools' search 
path
    
    Author: Benjamin Kietzman <[email protected]>
    
    Closes #3374 from bkietz/ARROW-4123-improve-windows-linting and squashes 
the following commits:
    
    c0bec4ea8 <Benjamin Kietzman> Rework linting into Python scripts so they 
will work on Windows
---
 cpp/CMakeLists.txt                                 | 107 +++++++-------
 ...g_format_exclusions.txt => lint_exclusions.txt} |   0
 cpp/build-support/lintutils.py                     | 115 ++++++++++++++++
 cpp/build-support/run-clang-tidy.sh                |  45 ------
 cpp/build-support/run_clang_format.py              | 153 ++++++++++-----------
 cpp/build-support/run_clang_tidy.py                | 121 ++++++++++++++++
 cpp/build-support/run_cpplint.py                   | 123 +++++++++++++++++
 cpp/cmake_modules/FindClangTools.cmake             |  49 +++----
 cpp/cmake_modules/FindInferTools.cmake             |   4 +-
 dev/release/rat_exclude_files.txt                  |   2 +-
 r/lint.sh                                          |   7 +-
 11 files changed, 520 insertions(+), 206 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index f673b52..459cb3a 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -64,6 +64,12 @@ if(POLICY CMP0054)
   cmake_policy(SET CMP0054 NEW)
 endif()
 
+# don't ignore <PackageName>_ROOT variables in find_package
+if(POLICY CMP0074)
+  # https://cmake.org/cmake/help/v3.12/policy/CMP0074.html
+  cmake_policy(SET CMP0074 NEW)
+endif()
+
 set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
 
 set(CLANG_FORMAT_VERSION "6.0")
@@ -82,6 +88,7 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1" OR 
INFER_FOUND)
   # See http://clang.llvm.org/docs/JSONCompilationDatabase.html
   set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
 endif()
+
 # ----------------------------------------------------------------------
 # cmake options
 
@@ -356,6 +363,9 @@ that have not been built"
     OFF)
 endif()
 
+# Needed for linting targets, etc.
+find_package(PythonInterp)
+
 if (ARROW_USE_CCACHE)
   find_program(CCACHE_FOUND ccache)
   if(CCACHE_FOUND)
@@ -379,65 +389,68 @@ if (NOT ARROW_VERBOSE_LINT)
   set(ARROW_LINT_QUIET "--quiet")
 endif()
 
-if (UNIX)
+if (NOT LINT_EXCLUSIONS_FILE)
+  # source files matching a glob from a line in this file
+  # will be excluded from linting (cpplint, clang-tidy, clang-format)
+  set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt)
+endif()
 
-  file(GLOB_RECURSE LINT_FILES
-    "${CMAKE_CURRENT_SOURCE_DIR}/src/*.h"
-    "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cc"
-    )
-
-  FOREACH(item ${LINT_FILES})
-    IF(NOT ((item MATCHES "_generated.h") OR
-            (item MATCHES "pyarrow_api.h") OR
-            (item MATCHES "pyarrow_lib.h") OR
-            (item MATCHES "config.h") OR
-            (item MATCHES "vendored/") OR
-            (item MATCHES "zmalloc.h") OR
-            (item MATCHES "ae.h")))
-      LIST(APPEND FILTERED_LINT_FILES ${item})
-    ENDIF()
-  ENDFOREACH(item ${LINT_FILES})
-
-  find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR})
-  message(STATUS "Found cpplint executable at ${CPPLINT_BIN}")
-
-  # Full lint
-  # Balancing act: cpplint.py takes a non-trivial time to launch,
-  # so process 12 files per invocation, while still ensuring parallelism
-  add_custom_target(lint echo ${FILTERED_LINT_FILES} | xargs -n12 -P8
-  ${CPPLINT_BIN}
-  --verbose=2 ${ARROW_LINT_QUIET}
-  --linelength=90
-  
--filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11,-runtime/references,-build/include_order
-  )
-endif (UNIX)
+find_program(CPPLINT_BIN NAMES cpplint cpplint.py HINTS ${BUILD_SUPPORT_DIR})
+message(STATUS "Found cpplint executable at ${CPPLINT_BIN}")
+
+add_custom_target(lint
+  ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_cpplint.py
+  --cpplint_binary ${CPPLINT_BIN}
+  --exclude_globs ${LINT_EXCLUSIONS_FILE}
+  --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+  ${ARROW_LINT_QUIET})
 
 ############################################################
 # "make format" and "make check-format" targets
 ############################################################
-
-# runs clang format and updates files in place.
-add_custom_target(format ${BUILD_SUPPORT_DIR}/run_clang_format.py
-  ${CLANG_FORMAT_BIN}
-  ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
-  ${CMAKE_CURRENT_SOURCE_DIR}/src --fix ${ARROW_LINT_QUIET})
-
-# runs clang format and exits with a non-zero exit code if any files need to 
be reformatted
-add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run_clang_format.py
-   ${CLANG_FORMAT_BIN}
-   ${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
-   ${CMAKE_CURRENT_SOURCE_DIR}/src ${ARROW_LINT_QUIET})
+if (${CLANG_FORMAT_FOUND})
+  # runs clang format and updates files in place.
+  add_custom_target(format
+    ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_format.py
+    --clang_format_binary ${CLANG_FORMAT_BIN}
+    --exclude_globs ${LINT_EXCLUSIONS_FILE}
+    --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+    --fix
+    ${ARROW_LINT_QUIET})
+
+  # runs clang format and exits with a non-zero exit code if any files need to 
be reformatted
+  add_custom_target(check-format
+    ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_format.py
+    --clang_format_binary ${CLANG_FORMAT_BIN}
+    --exclude_globs ${LINT_EXCLUSIONS_FILE}
+    --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+    ${ARROW_LINT_QUIET})
+endif()
 
 ############################################################
 # "make clang-tidy" and "make check-clang-tidy" targets
 ############################################################
 if (${CLANG_TIDY_FOUND})
+  # TODO check to make sure .clang-tidy is being respected
+
   # runs clang-tidy and attempts to fix any warning automatically
-  add_custom_target(clang-tidy ${BUILD_SUPPORT_DIR}/run-clang-tidy.sh 
${CLANG_TIDY_BIN} ${CMAKE_BINARY_DIR}/compile_commands.json 1
-  `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc | sed -e '/_generated/g'`)
+  add_custom_target(clang-tidy
+    ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_tidy.py
+    --clang_tidy_binary ${CLANG_TIDY_BIN}
+    --exclude_globs ${LINT_EXCLUSIONS_FILE}
+    --compile_commands ${CMAKE_BINARY_DIR}/compile_commands.json
+    --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+    --fix
+    ${ARROW_LINT_QUIET})
+
   # runs clang-tidy and exits with a non-zero exit code if any errors are 
found.
-  add_custom_target(check-clang-tidy ${BUILD_SUPPORT_DIR}/run-clang-tidy.sh 
${CLANG_TIDY_BIN} ${CMAKE_BINARY_DIR}/compile_commands.json
-  0 `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc |grep -v -F -f 
${CMAKE_CURRENT_SOURCE_DIR}/src/.clang-tidy-ignore | sed -e '/_generated/g'`)
+  add_custom_target(check-clang-tidy
+    ${PYTHON_EXECUTABLE} ${BUILD_SUPPORT_DIR}/run_clang_tidy.py
+    --clang_tidy_binary ${CLANG_TIDY_BIN}
+    --exclude_globs ${LINT_EXCLUSIONS_FILE}
+    --compile_commands ${CMAKE_BINARY_DIR}/compile_commands.json
+    --source_dir ${CMAKE_CURRENT_SOURCE_DIR}/src
+    ${ARROW_LINT_QUIET})
 endif()
 
 if (ARROW_ONLY_LINT)
diff --git a/cpp/build-support/clang_format_exclusions.txt 
b/cpp/build-support/lint_exclusions.txt
similarity index 100%
rename from cpp/build-support/clang_format_exclusions.txt
rename to cpp/build-support/lint_exclusions.txt
diff --git a/cpp/build-support/lintutils.py b/cpp/build-support/lintutils.py
new file mode 100644
index 0000000..1657cf2
--- /dev/null
+++ b/cpp/build-support/lintutils.py
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+from fnmatch import fnmatch
+from pathlib import Path
+from subprocess import Popen, CompletedProcess
+from collections.abc import Mapping, Sequence
+
+
+def chunk(seq, n):
+    """
+    divide a sequence into equal sized chunks
+    (the last chunk may be smaller, but won't be empty)
+    """
+    some = []
+    for element in seq:
+        if len(some) == n:
+            yield some
+            some = []
+        else:
+            some.append(element)
+    if len(some) > 0:
+        yield some
+
+
+def dechunk(chunks):
+    "flatten chunks into a single list"
+    seq = []
+    for chunk in chunks:
+        seq.extend(chunk)
+    return seq
+
+
+def run_parallel(cmds, **kwargs):
+    """
+    run each of cmds (with shared **kwargs) using subprocess.Popen
+    then wait for all of them to complete
+    returns a list of each command's CompletedProcess
+    """
+    procs = []
+    for cmd in cmds:
+        if not isinstance(cmd, Sequence):
+            continue
+        procs.append(Popen(cmd, **kwargs))
+
+    for cmd in cmds:
+        if not isinstance(cmd, Mapping):
+            continue
+        cmd_kwargs = kwargs.copy()
+        cmd_kwargs.update(cmd)
+        procs.append(Popen(**cmd_kwargs))
+
+    complete = []
+    for proc in procs:
+        result = proc.communicate()
+        c = CompletedProcess(proc.args, proc.returncode)
+        c.stdout, c.stderr = result
+        complete.append(c)
+    return complete
+
+
+_source_extensions = '''
+.h
+.cc
+'''.split()
+
+
+def get_sources(source_dir, exclude_globs=[]):
+    sources = []
+    for directory, subdirs, basenames in os.walk(source_dir):
+        for path in [Path(directory) / basename for basename in basenames]:
+            # filter out non-source files
+            if path.suffix not in _source_extensions:
+                continue
+
+            # filter out files that match the globs in the globs file
+            if any([fnmatch(str(path), glob) for glob in exclude_globs]):
+                continue
+
+            sources.append(path.resolve())
+    return sources
+
+
+def stdout_pathcolonline(completed_process, filenames):
+    """
+    given a completed process which may have reported some files as problematic
+    by printing the path name followed by ':' then a line number, examine
+    stdout and return the set of actually reported file names
+    """
+    bfilenames = set()
+    for filename in filenames:
+        bfilenames.add(filename.encode('utf-8') + b':')
+    problem_files = set()
+    for line in completed_process.stdout.splitlines():
+        for filename in bfilenames:
+            if line.startswith(filename):
+                problem_files.add(filename.decode('utf-8'))
+                bfilenames.remove(filename)
+                break
+    return problem_files, completed_process.stdout
diff --git a/cpp/build-support/run-clang-tidy.sh 
b/cpp/build-support/run-clang-tidy.sh
deleted file mode 100755
index 75e9458..0000000
--- a/cpp/build-support/run-clang-tidy.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#
-# Runs clang format in the given directory
-# Arguments:
-#   $1 - Path to the clang tidy binary
-#   $2 - Path to the compile_commands.json to use
-#   $3 - Apply fixes (will raise an error if false and not there where changes)
-#   $ARGN - Files to run clang-tidy on
-#
-CLANG_TIDY=$1
-shift
-COMPILE_COMMANDS=$1
-shift
-APPLY_FIXES=$1
-shift
-
-# clang format will only find its configuration if we are in
-# the source tree or in a path relative to the source tree
-if [ "$APPLY_FIXES" == "1" ]; then
-  $CLANG_TIDY -p $COMPILE_COMMANDS -fix  $@
-else
-  NUM_CORRECTIONS=`$CLANG_TIDY -p $COMPILE_COMMANDS $@ 2>&1 | grep -v Skipping 
| grep "warnings* generated" | wc -l`
-  if [ "$NUM_CORRECTIONS" -gt "0" ]; then
-    echo "clang-tidy had suggested fixes.  Please fix these!!!"
-    exit 1
-  fi
-fi
diff --git a/cpp/build-support/run_clang_format.py 
b/cpp/build-support/run_clang_format.py
index 24dcabb..8d1bb23 100755
--- a/cpp/build-support/run_clang_format.py
+++ b/cpp/build-support/run_clang_format.py
@@ -16,74 +16,51 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import lintutils
+from subprocess import PIPE
 import argparse
 import difflib
-import fnmatch
 import multiprocessing as mp
-import os
-import subprocess
 import sys
+from functools import partial
 
 
-class FileChecker(object):
+# examine the output of clang-format and if changes are
+# present assemble a (unified)patch of the difference
+def _check_one_file(completed_processes, filename):
+    with open(filename, "rb") as reader:
+        original = reader.read()
 
-    def __init__(self, arguments):
-        self.quiet = arguments.quiet
-        self.clang_format_binary = arguments.clang_format_binary
-
-    def run(self, filename):
-        if not self.quiet:
-            print("Checking {}".format(filename))
-        #
-        # Due to some incompatibilities between Python 2 and
-        # Python 3, there are some specific actions we take here
-        # to make sure the difflib.unified_diff call works.
-        #
-        # In Python 2, the call to subprocess.check_output return
-        # a 'str' type. In Python 3, however, the call returns a
-        # 'bytes' type unless the 'encoding' argument is
-        # specified. Unfortunately, the 'encoding' argument is not
-        # in the Python 2 API. We could do an if/else here based
-        # on the version of Python we are running, but it's more
-        # straightforward to read the file in binary and do utf-8
-        # conversion. In Python 2, it's just converting string
-        # types to unicode types, whereas in Python 3 it's
-        # converting bytes types to utf-8 encoded str types. This
-        # approach ensures that the arguments to
-        # difflib.unified_diff are acceptable string types in both
-        # Python 2 and Python 3.
-        with open(filename, "rb") as reader:
-            original = reader.read().decode('utf8')
+    formatted = completed_processes[filename].stdout
+    if formatted != original:
+        # Run the equivalent of diff -u
+        diff = list(difflib.unified_diff(
+            original.decode('utf8').splitlines(True),
+            formatted.decode('utf8').splitlines(True),
+            fromfile=filename,
+            tofile="{} (after clang format)".format(
+                filename)))
+    else:
+        diff = None
 
-        # Run clang-format and capture its output
-        formatted = subprocess.check_output(
-            [self.clang_format_binary,
-             filename])
-        formatted = formatted.decode('utf8')
-        if formatted != original:
-            # Run the equivalent of diff -u
-            diff = list(difflib.unified_diff(
-                original.splitlines(True),
-                formatted.splitlines(True),
-                fromfile=filename,
-                tofile="{} (after clang format)".format(
-                    filename)))
-            if diff:
-                return filename, diff
+    return filename, diff
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="Runs clang format on all of the source "
-        "files. If --fix is specified,  and compares the output "
-        "with the existing file, outputting a unifiied diff if "
-        "there are any necessary changes")
-    parser.add_argument("clang_format_binary",
+        description="Runs clang-format on all of the source "
+        "files. If --fix is specified enforce format by "
+        "modifying in place, otherwise compare the output "
+        "with the existing file and output any necessary "
+        "changes as a patch in unified diff format")
+    parser.add_argument("--clang_format_binary",
+                        required=True,
                         help="Path to the clang-format binary")
-    parser.add_argument("exclude_globs",
+    parser.add_argument("--exclude_globs",
                         help="Filename containing globs for files "
                         "that should be excluded from the checks")
-    parser.add_argument("source_dir",
+    parser.add_argument("--source_dir",
+                        required=True,
                         help="Root directory of the source code")
     parser.add_argument("--fix", default=False,
                         action="store_true",
@@ -93,47 +70,65 @@ if __name__ == "__main__":
     parser.add_argument("--quiet", default=False,
                         action="store_true",
                         help="If specified, only print errors")
-
     arguments = parser.parse_args()
 
+    exclude_globs = []
+    if arguments.exclude_globs:
+        for line in open(arguments.exclude_globs):
+            exclude_globs.append(line.strip())
+
     formatted_filenames = []
-    exclude_globs = [line.strip() for line in open(arguments.exclude_globs)]
-    for directory, subdirs, filenames in os.walk(arguments.source_dir):
-        fullpaths = (os.path.join(directory, filename)
-                     for filename in filenames)
-        source_files = [x for x in fullpaths
-                        if x.endswith(".h") or
-                        x.endswith(".cc") or
-                        x.endswith(".cpp")]
-        formatted_filenames.extend(
-            # Filter out files that match the globs in the globs file
-            [filename for filename in source_files
-             if not any((fnmatch.fnmatch(filename, exclude_glob)
-                         for exclude_glob in exclude_globs))])
+    for path in lintutils.get_sources(arguments.source_dir, exclude_globs):
+        formatted_filenames.append(str(path))
 
-    error = False
     if arguments.fix:
         if not arguments.quiet:
-            # Print out each file on its own line, but run
-            # clang format once for all of the files
             print("\n".join(map(lambda x: "Formatting {}".format(x),
                                 formatted_filenames)))
-        subprocess.check_call([arguments.clang_format_binary,
-                               "-i"] + formatted_filenames)
+
+        # Break clang-format invocations into chunks: each invocation formats
+        # 16 files. Wait for all processes to complete
+        results = lintutils.run_parallel([
+            [arguments.clang_format_binary, "-i"] + some
+            for some in lintutils.chunk(formatted_filenames, 16)
+        ])
+        for result in results:
+            # if any clang-format reported a parse error, bubble it
+            result.check_returncode()
+
     else:
-        checker = FileChecker(arguments)
+        # run an instance of clang-format for each source file in parallel,
+        # then wait for all processes to complete
+        results = lintutils.run_parallel([
+            [arguments.clang_format_binary, filename]
+            for filename in formatted_filenames
+        ], stdout=PIPE, stderr=PIPE)
+        for result in results:
+            # if any clang-format reported a parse error, bubble it
+            result.check_returncode()
+
+        error = False
+        checker = partial(_check_one_file, {
+            filename: result
+            for filename, result in zip(formatted_filenames, results)
+        })
         pool = mp.Pool()
         try:
-            for res in pool.imap(checker.run, formatted_filenames):
-                if res is not None:
-                    filename, diff = res
+            # check the output from each invocation of clang-format in parallel
+            for filename, diff in pool.imap(checker, formatted_filenames):
+                if not arguments.quiet:
+                    print("Checking {}".format(filename))
+                if diff:
                     print("{} had clang-format style issues".format(filename))
                     # Print out the diff to stderr
                     error = True
+                    # pad with a newline
+                    print(file=sys.stderr)
                     sys.stderr.writelines(diff)
+        except Exception:
+            error = True
+            raise
         finally:
             pool.terminate()
             pool.join()
-
-
-    sys.exit(1 if error else 0)
+        sys.exit(1 if error else 0)
diff --git a/cpp/build-support/run_clang_tidy.py 
b/cpp/build-support/run_clang_tidy.py
new file mode 100755
index 0000000..8e1be5a
--- /dev/null
+++ b/cpp/build-support/run_clang_tidy.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import multiprocessing as mp
+import lintutils
+from subprocess import PIPE, DEVNULL
+import sys
+from functools import partial
+
+
+def _get_chunk_key(filenames):
+    # lists are not hashable so key on the first filename in a chunk
+    return filenames[0]
+
+
+# clang-tidy outputs complaints in '/path:line_number: complaint' format,
+# so we can scan its output to get a list of files to fix
+def _check_some_files(completed_processes, filenames):
+    result = completed_processes[_get_chunk_key(filenames)]
+    return lintutils.stdout_pathcolonline(result, filenames)
+
+
+def _check_all(cmd, all_filenames):
+    # Output from clang-tidy can be massive, so only run 16 instances
+    # of it at a time. Process and drop that output before running more
+    for filenames in lintutils.chunk(all_filenames, 16 ** 2):
+        # each clang-tidy instance will process 16 files
+        chunks = list(lintutils.chunk(filenames, 16))
+        cmds = [cmd + some for some in chunks]
+        results = lintutils.run_parallel(cmds, stderr=DEVNULL, stdout=PIPE)
+
+        error = False
+        # record completed processes (keyed by the first filename in the input 
chunk)
+        # for lookup in _check_some_files
+        completed_processes = {
+            _get_chunk_key(some): result
+            for some, result in zip(chunks, results)
+        }
+        checker = partial(_check_some_files, completed_processes)
+        pool = mp.Pool()
+        try:
+            # check output of completed clang-tidy invocations in parallel
+            for problem_files, stdout in pool.imap(checker, chunks):
+                if problem_files:
+                    msg = "clang-tidy suggested fixes for {}"
+                    print("\n".join(map(msg.format, problem_files)))
+                    error = True
+        except Exception:
+            error = True
+            raise
+        finally:
+            pool.terminate()
+            pool.join()
+
+        if error:
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Runs clang-tidy on all ")
+    parser.add_argument("--clang_tidy_binary",
+                        required=True,
+                        help="Path to the clang-tidy binary")
+    parser.add_argument("--exclude_globs",
+                        help="Filename containing globs for files "
+                        "that should be excluded from the checks")
+    parser.add_argument("--compile_commands",
+                        required=True,
+                        help="compile_commands.json to pass clang-tidy")
+    parser.add_argument("--source_dir",
+                        required=True,
+                        help="Root directory of the source code")
+    parser.add_argument("--fix", default=False,
+                        action="store_true",
+                        help="If specified, will attempt to fix the "
+                        "source code instead of recommending fixes, "
+                        "defaults to %(default)s")
+    parser.add_argument("--quiet", default=False,
+                        action="store_true",
+                        help="If specified, only print errors")
+    arguments = parser.parse_args()
+
+    linted_filenames = []
+    for path in lintutils.get_sources(arguments.source_dir):
+        linted_filenames.append(str(path))
+
+    if not arguments.quiet:
+        msg = 'Tidying {}' if arguments.fix else 'Checking {}'
+        print("\n".join(map(msg.format, linted_filenames)))
+
+    cmd = [
+        arguments.clang_tidy_binary,
+        '-p',
+        arguments.compile_commands
+    ]
+    if arguments.fix:
+        cmd.append('-fix')
+        results = lintutils.run_parallel(
+            [cmd + some for some in lintutils.chunk(linted_filenames, 16)])
+        for result in results:
+            result.check_returncode()
+
+    else:
+        _check_all(cmd, linted_filenames)
diff --git a/cpp/build-support/run_cpplint.py b/cpp/build-support/run_cpplint.py
new file mode 100755
index 0000000..83fdc96
--- /dev/null
+++ b/cpp/build-support/run_cpplint.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import lintutils
+from subprocess import PIPE, STDOUT
+import argparse
+import multiprocessing as mp
+import sys
+import platform
+from functools import partial
+
+
+_filters = '''
+-whitespace/comments
+-readability/todo
+-build/header_guard
+-build/c++11
+-runtime/references
+-build/include_order
+'''.split()
+
+
+def _get_chunk_key(filenames):
+    # lists are not hashable so key on the first filename in a chunk
+    return filenames[0]
+
+
+def _check_some_files(completed_processes, filenames):
+    # cpplint outputs complaints in '/path:line_number: complaint' format,
+    # so we can scan its output to get a list of files to fix
+    result = completed_processes[_get_chunk_key(filenames)]
+    return lintutils.stdout_pathcolonline(result, filenames)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Runs cpplint on all of the source files.")
+    parser.add_argument("--cpplint_binary",
+                        required=True,
+                        help="Path to the cpplint binary")
+    parser.add_argument("--exclude_globs",
+                        help="Filename containing globs for files "
+                        "that should be excluded from the checks")
+    parser.add_argument("--source_dir",
+                        required=True,
+                        help="Root directory of the source code")
+    parser.add_argument("--quiet", default=False,
+                        action="store_true",
+                        help="If specified, only print errors")
+    arguments = parser.parse_args()
+
+    exclude_globs = []
+    if arguments.exclude_globs:
+        for line in open(arguments.exclude_globs):
+            exclude_globs.append(line.strip())
+
+    linted_filenames = []
+    for path in lintutils.get_sources(arguments.source_dir, exclude_globs):
+        linted_filenames.append(str(path))
+
+    cmd = [
+        arguments.cpplint_binary,
+        '--verbose=2',
+        '--linelength=90',
+        '--filter=' + ','.join(_filters)
+    ]
+    if (arguments.cpplint_binary.endswith('.py') and
+            platform.system() == 'Windows'):
+        # Windows doesn't support executable scripts; execute with
+        # sys.executable
+        cmd.insert(0, sys.executable)
+    if arguments.quiet:
+        cmd.append('--quiet')
+    else:
+        print("\n".join(map(lambda x: "Linting {}".format(x),
+                            linted_filenames)))
+
+    # lint files in chunks: each invocation of cpplint will process 16 files
+    chunks = list(lintutils.chunk(linted_filenames, 16))
+    cmds = [cmd + some for some in chunks]
+    results = lintutils.run_parallel(cmds, stdout=PIPE, stderr=STDOUT)
+
+    error = False
+    # record completed processes (keyed by the first filename in the input
+    # chunk) for lookup in _check_some_files
+    completed_processes = {
+        _get_chunk_key(filenames): result
+        for filenames, result in zip(chunks, results)
+    }
+    checker = partial(_check_some_files, completed_processes)
+    pool = mp.Pool()
+    try:
+        # scan the outputs of various cpplint invocations in parallel to
+        # distill a list of problematic files
+        for problem_files, stdout in pool.imap(checker, chunks):
+            if problem_files:
+                msg = "{} had cpplint issues"
+                print("\n".join(map(msg.format, problem_files)))
+                print(stdout, file=sys.stderr)
+                error = True
+    except Exception:
+        error = True
+        raise
+    finally:
+        pool.terminate()
+        pool.join()
+
+    sys.exit(1 if error else 0)
diff --git a/cpp/cmake_modules/FindClangTools.cmake 
b/cpp/cmake_modules/FindClangTools.cmake
index 62ee8c3..55b425f 100644
--- a/cpp/cmake_modules/FindClangTools.cmake
+++ b/cpp/cmake_modules/FindClangTools.cmake
@@ -17,11 +17,15 @@
 #
 #  find_package(ClangTools)
 #
-# Variables used by this module, they can change the default behaviour and need
+# Variables used by this module which can change the default behaviour and need
 # to be set before calling find_package:
 #
+#  CLANG_FORMAT_VERSION -
+#   The version of clang-format to find. If this is not specified, clang-format
+#   will not be searched for.
+#
 #  ClangTools_PATH -
-#   When set, this path is inspected instead of standard library binary 
locations
+#   When set, this path is inspected in addition to standard library binary 
locations
 #   to find clang-tidy and clang-format
 #
 # This module defines
@@ -45,6 +49,13 @@ else()
   endif()
 endif()
 
+set(CLANG_TOOLS_SEARCH_PATHS
+  ${ClangTools_PATH}
+  $ENV{CLANG_TOOLS_PATH}
+  /usr/local/bin /usr/bin
+  "C:/Program Files/LLVM/bin"
+  "${HOMEBREW_PREFIX}/bin")
+
 find_program(CLANG_TIDY_BIN
   NAMES clang-tidy-4.0
   clang-tidy-3.9
@@ -52,26 +63,21 @@ find_program(CLANG_TIDY_BIN
   clang-tidy-3.7
   clang-tidy-3.6
   clang-tidy
-  PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin 
"${HOMEBREW_PREFIX}/bin"
-        NO_DEFAULT_PATH
+  PATHS ${CLANG_TOOLS_SEARCH_PATHS} NO_DEFAULT_PATH
 )
 
 if ( "${CLANG_TIDY_BIN}" STREQUAL "CLANG_TIDY_BIN-NOTFOUND" )
   set(CLANG_TIDY_FOUND 0)
-  message("clang-tidy not found")
+  message(STATUS "clang-tidy not found")
 else()
   set(CLANG_TIDY_FOUND 1)
-  message("clang-tidy found at ${CLANG_TIDY_BIN}")
+  message(STATUS "clang-tidy found at ${CLANG_TIDY_BIN}")
 endif()
 
 if (CLANG_FORMAT_VERSION)
     find_program(CLANG_FORMAT_BIN
       NAMES clang-format-${CLANG_FORMAT_VERSION}
-      PATHS
-            ${ClangTools_PATH}
-            $ENV{CLANG_TOOLS_PATH}
-            /usr/local/bin /usr/bin "${HOMEBREW_PREFIX}/bin"
-            NO_DEFAULT_PATH
+      PATHS ${CLANG_TOOLS_SEARCH_PATHS} NO_DEFAULT_PATH
     )
 
     # If not found yet, search alternative locations
@@ -107,11 +113,7 @@ if (CLANG_FORMAT_VERSION)
         # try searching for "clang-format" and check the version
         find_program(CLANG_FORMAT_BIN
           NAMES clang-format
-          PATHS
-                ${ClangTools_PATH}
-                $ENV{CLANG_TOOLS_PATH}
-                /usr/local/bin /usr/bin
-                NO_DEFAULT_PATH
+          PATHS ${CLANG_TOOLS_SEARCH_PATHS} NO_DEFAULT_PATH
         )
         if (NOT ("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND"))
           execute_process(COMMAND ${CLANG_FORMAT_BIN} "-version"
@@ -124,23 +126,12 @@ if (CLANG_FORMAT_VERSION)
       endif()
     endif()
 
-else()
-    find_program(CLANG_FORMAT_BIN
-      NAMES clang-format-4.0
-      clang-format-3.9
-      clang-format-3.8
-      clang-format-3.7
-      clang-format-3.6
-      clang-format
-      PATHS ${ClangTools_PATH} $ENV{CLANG_TOOLS_PATH} /usr/local/bin /usr/bin 
"${HOMEBREW_PREFIX}/bin"
-            NO_DEFAULT_PATH
-    )
 endif()
 
 if ( "${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND" )
   set(CLANG_FORMAT_FOUND 0)
-  message("clang-format not found")
+  message(STATUS "clang-format not found")
 else()
   set(CLANG_FORMAT_FOUND 1)
-  message("clang-format found at ${CLANG_FORMAT_BIN}")
+  message(STATUS "clang-format found at ${CLANG_FORMAT_BIN}")
 endif()
diff --git a/cpp/cmake_modules/FindInferTools.cmake 
b/cpp/cmake_modules/FindInferTools.cmake
index 00c6709..e2d1020 100644
--- a/cpp/cmake_modules/FindInferTools.cmake
+++ b/cpp/cmake_modules/FindInferTools.cmake
@@ -38,8 +38,8 @@ find_program(INFER_BIN
 
 if ( "${INFER_BIN}" STREQUAL "INFER_BIN-NOTFOUND" )
   set(INFER_FOUND 0)
-  message("infer not found")
+  message(STATUS "infer not found")
 else()
   set(INFER_FOUND 1)
-  message("infer found at ${INFER_BIN}")
+  message(STATUS "infer found at ${INFER_BIN}")
 endif()
diff --git a/dev/release/rat_exclude_files.txt 
b/dev/release/rat_exclude_files.txt
index 552c3e1..4866ec2 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -16,7 +16,7 @@ cpp/src/arrow/status.h
 cpp/src/arrow/vendored/*
 cpp/build-support/asan_symbolize.py
 cpp/build-support/cpplint.py
-cpp/build-support/clang_format_exclusions.txt
+cpp/build-support/lint_exclusions.txt
 cpp/build-support/iwyu/*
 cpp/cmake_modules/BuildUtils.cmake
 cpp/cmake_modules/FindPythonLibsNew.cmake
diff --git a/r/lint.sh b/r/lint.sh
index 14e457d..8a9a2fb 100755
--- a/r/lint.sh
+++ b/r/lint.sh
@@ -23,6 +23,7 @@ CPP_BUILD_SUPPORT=$SOURCE_DIR/../cpp/build-support
 LLVM_VERSION=6.0
 CLANG_FORMAT=clang-format-$LLVM_VERSION
 
-$CPP_BUILD_SUPPORT/run_clang_format.py $CLANG_FORMAT \
-                                       
$CPP_BUILD_SUPPORT/clang_format_exclusions.txt \
-                                       $SOURCE_DIR/src --quiet $1
+$CPP_BUILD_SUPPORT/run_clang_format.py \
+    --clang_format_binary=$CLANG_FORMAT \
+    --exclude_glob=$CPP_BUILD_SUPPORT/lint_exclusions.txt \
+    --source_dir=$SOURCE_DIR/src --quiet $1

Reply via email to