JonasToth updated this revision to Diff 175459.
JonasToth added a comment.

- after countless attempts of fixing the unicode problem, it is finally done.
- Remove all unnecessary whitespace
- remove the `xxx warnings generated.` as well

This setup now runs in my BB and is a good approximation (for me) how the
deduplication should work in the future.

Still just for reference and documentation.


Repository:
  rCTE Clang Tools Extra

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D54141/new/

https://reviews.llvm.org/D54141

Files:
  clang-tidy/tool/run-clang-tidy.py
  clang-tidy/tool/run_clang_tidy.py
  clang-tidy/tool/test_input/out_csa_cmake.log
  clang-tidy/tool/test_input/out_performance_cmake.log
  clang-tidy/tool/test_log_parser.py
  docs/ReleaseNotes.rst

Index: docs/ReleaseNotes.rst
===================================================================
--- docs/ReleaseNotes.rst
+++ docs/ReleaseNotes.rst
@@ -225,6 +225,9 @@
   - floating point - integer narrowing conversions,
   - constants with narrowing conversions (even in ternary operator).
 
+- `run-clang-tidy.py` support deduplication of `clang-tidy` diagnostics
+  to reduce the amount of output with the optional `-deduplicate` flag.
+
 Improvements to include-fixer
 -----------------------------
 
Index: clang-tidy/tool/test_log_parser.py
===================================================================
--- /dev/null
+++ clang-tidy/tool/test_log_parser.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import unittest
+from run_clang_tidy import Diagnostic, Deduplication
+from run_clang_tidy import ParseClangTidyDiagnostics, _is_valid_diag_match
+
+
+class TestDiagnostics(unittest.TestCase):
+    """Test fingerprinting diagnostic messages"""
+
+    def test_construction(self):
+        d = Diagnostic("/home/user/project/my_file.h", 24, 4,
+                       "warning: Do not do this thing [warning-category]")
+        self.assertIsNotNone(d)
+        self.assertEqual(str(d),
+                         "/home/user/project/my_file.h:24:4: warning: Do not do this thing [warning-category]")
+
+        d.add_additional_line("      MyCodePiece();")
+        d.add_additional_line("      ^")
+
+        self.assertEqual(str(d),
+                         "/home/user/project/my_file.h:24:4: warning: Do not do this thing [warning-category]"
+                         "\n      MyCodePiece();"
+                         "\n      ^")
+
+
+class TestDeduplication(unittest.TestCase):
+    """Test the `DiagEssence` based deduplication of diagnostic messages."""
+
+    def test_construction(self):
+        self.assertIsNotNone(Deduplication())
+
+    def test_dedup(self):
+        dedup = Deduplication()
+        d = Diagnostic("/home/user/project/my_file.h", 24, 4,
+                       "warning: Do not do this thing [warning-category]")
+        self.assertTrue(dedup.insert_and_query(d))
+        self.assertFalse(dedup.insert_and_query(d))
+
+        d2 = Diagnostic("/home/user/project/my_file.h", 24, 4,
+                        "warning: Do not do this thing [warning-category]")
+        d2.add_additional_line("      MyCodePiece();")
+        d2.add_additional_line("      ^")
+        self.assertTrue(dedup.insert_and_query(d2))
+        self.assertFalse(dedup.insert_and_query(d2))
+
+        d3 = Diagnostic("/home/user/project/my_file.h", 24, 4,
+                        "warning: Do not do this thing [warning-category]")
+        self.assertFalse(dedup.insert_and_query(d3))
+
+class TestLinewiseParsing(unittest.TestCase):
+    def test_construction(self):
+        self.assertIsNotNone(ParseClangTidyDiagnostics())
+
+    def test_valid_diags_regex(self):
+        pp = ParseClangTidyDiagnostics()
+
+        warning = "/home/user/project/my_file.h:123:1: warning: don't do it [no]"
+        m = pp._diag_re.match(warning)
+        self.assertTrue(m)
+        self.assertTrue(_is_valid_diag_match(m.groups()))
+
+        error = "/home/user/project/my_file.h:1:110: error: wrong! [not-ok]"
+        m = pp._diag_re.match(error)
+        self.assertTrue(m)
+        self.assertTrue(_is_valid_diag_match(m.groups()))
+
+        hybrid = "/home/user/project/boo.cpp:30:42: error: wrong! [not-ok,bad]"
+        m = pp._diag_re.match(hybrid)
+        self.assertTrue(m)
+        self.assertTrue(_is_valid_diag_match(m.groups()))
+
+        note = "/home/user/project/my_file.h:1:110: note: alksdj"
+        m = pp._diag_re.match(note)
+        self.assertFalse(m)
+
+        garbage = "not a path:not_a_number:110: gibberish"
+        m = pp._diag_re.match(garbage)
+        self.assertFalse(m)
+
+    def test_single_diagnostics(self):
+        pp = ParseClangTidyDiagnostics()
+        example_warning = [
+            "/project/git/Source/kwsys/Terminal.c:53:21: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]",
+        ]
+        pp._parse_lines(example_warning)
+        self.assertEqual(
+            str(pp.get_diags()[0]),
+            "/project/git/Source/kwsys/Terminal.c:53:21: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]"
+        )
+
+    def test_no_diag(self):
+        pp = ParseClangTidyDiagnostics()
+        garbage_lines = \
+"""
+    hicpp-no-array-decay
+    hicpp-no-assembler
+    hicpp-no-malloc
+    hicpp-noexcept-move
+    hicpp-signed-bitwise
+    hicpp-special-member-functions
+    hicpp-static-assert
+    hicpp-undelegated-constructor
+    hicpp-use-auto
+    hicpp-use-emplace
+    hicpp-use-equals-default
+    hicpp-use-equals-delete
+    hicpp-use-noexcept
+    hicpp-use-nullptr
+    hicpp-use-override
+    hicpp-vararg
+
+clang-apply-replacements version 8.0.0
+18 warnings generated.
+36 warnings generated.
+Suppressed 26 warnings (26 in non-user code).
+Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
+
+61 warnings generated.
+122 warnings generated.
+Suppressed 122 warnings (122 in non-user code).
+Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
+
+clang-tidy -header-filter=^/project/git/.* -checks=-*,hicpp-* -export-fixes /tmp/tmpH8MVt0/tmpErKPl_.yaml -p=/project/git /project/git/Source/kwsys/Terminal.c
+"""
+        pp.parse_string(garbage_lines)
+        self.assertEqual(len(pp.get_diags()), 0)
+
+    def test_deduplicate_basic_multi_line_warning(self):
+        pp = ParseClangTidyDiagnostics()
+        example_warning = [
+            "/project/git/Source/kwsys/Terminal.c:53:21: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]",
+            "int default_tty = color & kwsysTerminal_Color_AssumeTTY;",
+            "                    ^",
+        ]
+
+        pp._parse_lines(example_warning + example_warning)
+        diags = pp.get_diags()
+
+        self.assertEqual(len(diags), 1)
+        self.assertEqual(
+            str(diags[0]),
+            "/project/git/Source/kwsys/Terminal.c:53:21: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]"
+            "\nint default_tty = color & kwsysTerminal_Color_AssumeTTY;"
+            "\n                    ^")
+
+    def test_real_diags(self):
+        pp = ParseClangTidyDiagnostics()
+        excerpt = \
+"""/project/git/Source/kwsys/Base64.c:54:35: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+  dest[0] = kwsysBase64EncodeChar((src[0] >> 2) & 0x3F);
+                                  ^
+/project/git/Source/kwsys/Base64.c:54:36: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+  dest[0] = kwsysBase64EncodeChar((src[0] >> 2) & 0x3F);
+                                   ^
+/project/git/Source/kwsys/Base64.c:56:27: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+    kwsysBase64EncodeChar(((src[0] << 4) & 0x30) | ((src[1] >> 4) & 0x0F));
+                          ^
+/project/git/Source/kwsys/Base64.c:56:28: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+    kwsysBase64EncodeChar(((src[0] << 4) & 0x30) | ((src[1] >> 4) & 0x0F));
+                           ^
+/project/git/Source/kwsys/Base64.c:54:35: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+  dest[0] = kwsysBase64EncodeChar((src[0] >> 2) & 0x3F);
+                                  ^
+/project/git/Source/kwsys/Base64.c:54:36: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+  dest[0] = kwsysBase64EncodeChar((src[0] >> 2) & 0x3F);
+                                   ^
+/project/git/Source/kwsys/Base64.c:56:27: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+    kwsysBase64EncodeChar(((src[0] << 4) & 0x30) | ((src[1] >> 4) & 0x0F));
+                          ^
+/project/git/Source/kwsys/Base64.c:56:28: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+    kwsysBase64EncodeChar(((src[0] << 4) & 0x30) | ((src[1] >> 4) & 0x0F));
+                           ^
+/project/git/Source/kwsys/testCommandLineArguments.cxx:16:10: warning: inclusion of deprecated C++ header 'string.h'; consider using 'cstring' instead [hicpp-deprecated-headers]
+#include <string.h> /* strcmp */
+         ^~~~~~~~~~
+         <cstring>
+/project/git/Source/kwsys/testFStream.cxx:10:10: warning: inclusion of deprecated C++ header 'string.h'; consider using 'cstring' instead [hicpp-deprecated-headers]
+#include <string.h>
+         ^~~~~~~~~~
+         <cstring>
+/project/git/Source/kwsys/testFStream.cxx:77:47: warning: do not implicitly decay an array into a pointer; consider using gsl::array_view or an explicit cast instead [hicpp-no-array-decay]
+      out.write(reinterpret_cast<const char*>(expected_bom_data[i] + 1),
+                                              ^
+/project/git/Source/kwsys/testFStream.cxx:78:18: warning: do not implicitly decay an array into a pointer; consider using gsl::array_view or an explicit cast instead [hicpp-no-array-decay]
+                *expected_bom_data[i]);
+                 ^
+/project/git/Source/kwsys/testFStream.cxx:109:3: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+  ret |= testNoFile();
+  ^
+/project/git/Source/kwsys/testFStream.cxx:110:3: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+  ret |= testBOM();
+  ^
+/project/git/Source/kwsys/testSystemInformation.cxx:83:36: warning: use of a signed integer operand with a binary bitwise operator [hicpp-signed-bitwise]
+    if (info.DoesCPUSupportFeature(static_cast<long int>(1) << i)) {
+                                   ^"""
+        pp.parse_string(excerpt)
+        self.assertEqual(len(pp.get_diags()), 11)
+
+        self.maxDiff = None
+        generated_diag = "\n".join(str(diag) for diag in pp.get_diags())
+        # It is not identical because of deduplication.
+        self.assertNotEqual(generated_diag, excerpt)
+
+        # The first 11 lines are duplicated diags but the rest is identical.
+        self.assertEqual(generated_diag,
+                         "\n".join(excerpt.splitlines()[12:]))
+
+        # Pretend that the next clang-tidy invocation returns its data
+        # and the parser shall deduplicate this one as well. This time
+        # no new data is expected.
+        pp.reset_parser()
+        pp.parse_string(excerpt)
+        self.assertEqual(len(pp.get_diags()), 0)
+
+    def test_log_files(self):
+        pp = ParseClangTidyDiagnostics()
+        pp._parse_file("test_input/out_csa_cmake.log")
+        self.assertEqual(len(pp.get_diags()), 5)
+
+        pp.reset_parser()
+        pp._parse_file("test_input/out_csa_cmake.log")
+        self.assertEqual(len(pp.get_diags()), 0)
+
+        pp.reset_parser()
+        pp._parse_file("test_input/out_performance_cmake.log")
+        self.assertEqual(len(pp.get_diags()), 24)
+
+        pp.reset_parser()
+        pp._parse_file("test_input/out_performance_cmake.log")
+        self.assertEqual(len(pp.get_diags()), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
Index: clang-tidy/tool/test_input/out_performance_cmake.log
===================================================================
--- /dev/null
+++ clang-tidy/tool/test_input/out_performance_cmake.log
@@ -0,0 +1,93 @@
+/project/git/Source/kwsys/Glob.cxx:200:28: warning: string concatenation results in allocation of unnecessary temporary strings; consider using 'operator+=' or 'string::append()' instead [performance-inefficient-string-concatenation]
+      realname = dir + "/" + fname;
+                           ^
+/project/git/Source/kwsys/Glob.cxx:223:47: warning: string concatenation results in allocation of unnecessary temporary strings; consider using 'operator+=' or 'string::append()' instead [performance-inefficient-string-concatenation]
+                        "' failed! Reason: '" + realPathErrorMessage + "'"));
+                                              ^
+/project/git/Source/kwsys/Glob.cxx:253:42: warning: string concatenation results in allocation of unnecessary temporary strings; consider using 'operator+=' or 'string::append()' instead [performance-inefficient-string-concatenation]
+          message += canonicalPath + "/" + fname;
+                                         ^
+/project/git/Source/kwsys/Glob.cxx:305:28: warning: string concatenation results in allocation of unnecessary temporary strings; consider using 'operator+=' or 'string::append()' instead [performance-inefficient-string-concatenation]
+      realname = dir + "/" + fname;
+                           ^
+/project/git/Source/kwsys/SystemTools.cxx:1993:25: warning: 'find_first_of' called with a string literal consisting of a single character; consider using the more effective overload accepting a character [performance-faster-string-find]
+  if (ret.find_first_of(" ") != std::string::npos) {
+                        ^~~
+                        ' '
+/project/git/Source/kwsys/SystemTools.cxx:2068:17: warning: local copy 'source_name' of the variable 'source' is never modified; consider avoiding the copy [performance-unnecessary-copy-initialization]
+    std::string source_name = source;
+                ^
+    const      &
+/project/git/Source/kwsys/SystemTools.cxx:2212:19: warning: local copy 'source_name' of the variable 'source' is never modified; consider avoiding the copy [performance-unnecessary-copy-initialization]
+      std::string source_name = source;
+                  ^
+      const      &
+/project/git/Source/kwsys/SystemTools.cxx:3050:49: warning: 'rfind' called with a string literal consisting of a single character; consider using the more effective overload accepting a character [performance-faster-string-find]
+    std::string::size_type slashPos = dir.rfind("/");
+                                                ^~~
+                                                '/'
+/project/git/Source/kwsys/SystemTools.cxx:3207:32: warning: std::move of the const expression has no effect; remove std::move() [performance-move-const-arg]
+      out_components.push_back(std::move(*i));
+                               ^~~~~~~~~~  ~
+/project/git/Source/kwsys/SystemTools.cxx:3638:15: warning: local copy 'data' of the variable 'str' is never modified; consider avoiding the copy [performance-unnecessary-copy-initialization]
+  std::string data(str);
+              ^
+  const      &
+/project/git/Source/kwsys/SystemTools.cxx:3688:47: warning: 'rfind' called with a string literal consisting of a single character; consider using the more effective overload accepting a character [performance-faster-string-find]
+  std::string::size_type slash_pos = fn.rfind("/");
+                                              ^~~
+                                              '/'
+/project/git/Source/kwsys/SystemInformation.cxx:1340:28: warning: 'rfind' called with a string literal consisting of a single character; consider using the more effective overload accepting a character [performance-faster-string-find]
+    size_t at = file.rfind("/");
+                           ^~~
+                           '/'
+/project/git/Source/kwsys/SystemInformation.cxx:3354:23: warning: 'find' called with a string literal consisting of a single character; consider using the more effective overload accepting a character [performance-faster-string-find]
+    pos = buffer.find(":", pos);
+                      ^~~
+                      ':'
+/project/git/Source/kwsys/SystemInformation.cxx:3355:31: warning: 'find' called with a string literal consisting of a single character; consider using the more effective overload accepting a character [performance-faster-string-find]
+    size_t pos2 = buffer.find("\n", pos);
+                              ^~~~
+                              '\n'
+/project/git/Source/kwsys/SystemInformation.cxx:4605:43: warning: 'find' called with a string literal consisting of a single character; consider using the more effective overload accepting a character [performance-faster-string-find]
+    size_t pos2 = this->SysCtlBuffer.find("\n", pos);
+                                          ^~~~
+                                          '\n'
+/project/git/Source/kwsys/SystemInformation.cxx:5407:29: warning: 'find' called with a string literal consisting of a single character; consider using the more effective overload accepting a character [performance-faster-string-find]
+  while ((pos = output.find("\r", pos)) != std::string::npos) {
+                            ^~~~
+                            '\r'
+/project/git/Source/kwsys/SystemInformation.cxx:5413:29: warning: 'find' called with a string literal consisting of a single character; consider using the more effective overload accepting a character [performance-faster-string-find]
+  while ((pos = output.find("\n", pos)) != std::string::npos) {
+                            ^~~~
+                            '\n'
+/project/git/Utilities/cmjsoncpp/include/json/value.h:235:5: warning: move constructors should be marked noexcept [performance-noexcept-move-constructor]
+    CZString(CZString&& other);
+    ^
+/project/git/Utilities/cmjsoncpp/include/json/value.h:241:15: warning: move assignment operators should be marked noexcept [performance-noexcept-move-constructor]
+    CZString& operator=(CZString&& other);
+              ^
+/project/git/Utilities/cmjsoncpp/include/json/value.h:326:3: warning: move constructors should be marked noexcept [performance-noexcept-move-constructor]
+  Value(Value&& other);
+  ^
+/project/git/Utilities/cmjsoncpp/src/lib_json/json_reader.cpp:1998:53: warning: the parameter 'key' is copied for each invocation but only used as a const reference; consider making it a const reference [performance-unnecessary-value-param]
+Value& CharReaderBuilder::operator[](JSONCPP_STRING key)
+                                                    ^
+/project/git/Utilities/cmjsoncpp/include/json/value.h:235:5: warning: move constructors should be marked noexcept [performance-noexcept-move-constructor]
+    CZString(CZString&& other);
+    ^
+/project/git/Utilities/cmjsoncpp/include/json/value.h:241:15: warning: move assignment operators should be marked noexcept [performance-noexcept-move-constructor]
+    CZString& operator=(CZString&& other);
+              ^
+/project/git/Utilities/cmjsoncpp/include/json/value.h:326:3: warning: move constructors should be marked noexcept [performance-noexcept-move-constructor]
+  Value(Value&& other);
+  ^
+/project/git/Utilities/cmjsoncpp/src/lib_json/json_value.cpp:278:18: warning: move constructors should be marked noexcept [performance-noexcept-move-constructor]
+Value::CZString::CZString(CZString&& other)
+                 ^
+/project/git/Utilities/cmjsoncpp/src/lib_json/json_value.cpp:302:35: warning: move assignment operators should be marked noexcept [performance-noexcept-move-constructor]
+Value::CZString& Value::CZString::operator=(CZString&& other) {
+                                  ^
+/project/git/Utilities/cmjsoncpp/src/lib_json/json_value.cpp:490:8: warning: move constructors should be marked noexcept [performance-noexcept-move-constructor]
+Value::Value(Value&& other) {
+       ^
Index: clang-tidy/tool/test_input/out_csa_cmake.log
===================================================================
--- /dev/null
+++ clang-tidy/tool/test_input/out_csa_cmake.log
@@ -0,0 +1,408 @@
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:83:16: warning: Null pointer passed as an argument to a 'nonnull' parameter [clang-analyzer-core.NonNullParamChecker]
+               strcmp(valid_unused_args[cc], newArgv[cc]) != 0) {
+               ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:35:7: note: Assuming the condition is false
+  if (!arg.Parse()) {
+      ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:35:3: note: Taking false branch
+  if (!arg.Parse()) {
+  ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:39:7: note: Assuming 'n' is equal to 24
+  if (n != 24) {
+      ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:39:3: note: Taking false branch
+  if (n != 24) {
+  ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:43:7: note: Assuming 'm' is non-null
+  if (!m || strcmp(m, "test value") != 0) {
+      ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:43:7: note: Left side of '||' is false
+
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:43:3: note: Taking false branch
+  if (!m || strcmp(m, "test value") != 0) {
+  ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:47:3: note: Taking true branch
+  if (p != "1") {
+  ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:54:3: note: Taking true branch
+  if (m) {
+  ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:71:7: note: Assuming 'newArgc' is equal to 9
+  if (newArgc != 9) {
+      ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:71:3: note: Taking false branch
+  if (newArgc != 9) {
+  ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:75:3: note: Loop condition is true.  Entering loop body
+  for (cc = 0; cc < newArgc; ++cc) {
+  ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:79:5: note: Taking false branch
+    if (cc >= 9) {
+    ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:82:38: note: Left side of '&&' is false
+    } else if (valid_unused_args[cc] &&
+                                     ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:75:3: note: Loop condition is true.  Entering loop body
+  for (cc = 0; cc < newArgc; ++cc) {
+  ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:77:5: note: Calling 'operator<<<std::char_traits<char>>'
+    std::cout << "Unused argument[" << cc << "] = [" << newArgv[cc] << "]"
+    ^
+/usr/bin/../lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/ostream:558:11: note: Assuming '__s' is null
+      if (!__s)
+          ^
+/usr/bin/../lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/ostream:558:7: note: Taking true branch
+      if (!__s)
+      ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:77:5: note: Returning from 'operator<<<std::char_traits<char>>'
+    std::cout << "Unused argument[" << cc << "] = [" << newArgv[cc] << "]"
+    ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:79:5: note: Taking false branch
+    if (cc >= 9) {
+    ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:82:16: note: Left side of '&&' is true
+    } else if (valid_unused_args[cc] &&
+               ^
+/project/git/Source/kwsys/testCommandLineArguments1.cxx:83:16: note: Null pointer passed as an argument to a 'nonnull' parameter
+               strcmp(valid_unused_args[cc], newArgv[cc]) != 0) {
+               ^
+/project/git/Utilities/cmcurl/lib/urldata.h:1209:8: warning: Excessive padding in 'struct UrlState' (61 padding bytes, where 5 is optimal). 
+Optimal fields order: 
+conn_cache, 
+lastconnect, 
+headerbuff, 
+headersize, 
+buffer, 
+ulbuf, 
+current_speed, 
+first_host, 
+session, 
+sessionage, 
+scratch, 
+prev_signal, 
+resolver, 
+most_recent_ftp_entrypath, 
+crlf_conversions, 
+pathbuffer, 
+path, 
+range, 
+resume_from, 
+rtsp_next_client_CSeq, 
+rtsp_next_server_CSeq, 
+rtsp_CSeq_recv, 
+infilesize, 
+drain, 
+fread_func, 
+in, 
+stream_depends_on, 
+keeps_speed, 
+expiretime, 
+authhost, 
+authproxy, 
+timeoutlist, 
+timenode, 
+digest, 
+proxydigest, 
+tempwrite, 
+expires, 
+first_remote_port, 
+tempcount, 
+os_errno, 
+httpversion, 
+stream_weight, 
+multi_owned_by_easy, 
+this_is_a_follow, 
+refused_stream, 
+errorbuf, 
+allow_port, 
+authproblem, 
+ftp_trying_alternative, 
+wildcardmatch, 
+expect100header, 
+prev_block_had_trailing_cr, 
+slash_removed, 
+use_range, 
+rangestringalloc, 
+done, 
+stream_depends_e, 
+consider reordering the fields or adding explicit padding members [clang-analyzer-optin.performance.Padding]
+struct UrlState {
+       ^
+project/git/Utilities/cmcurl/lib/urldata.h:1457:8: warning: Excessive padding in 'struct UserDefined' (120 padding bytes, where 0 is optimal). 
+Optimal fields order: 
+err, 
+debugdata, 
+errorbuffer, 
+proxyport, 
+out, 
+in_set, 
+writeheader, 
+rtp_out, 
+use_port, 
+httpauth, 
+proxyauth, 
+socks5auth, 
+followlocation, 
+maxredirs, 
+postfields, 
+seek_func, 
+postfieldsize, 
+fwrite_func, 
+fwrite_header, 
+fwrite_rtp, 
+fread_func_set, 
+fprogress, 
+fxferinfo, 
+fdebug, 
+ioctl_func, 
+fsockopt, 
+sockopt_client, 
+fopensocket, 
+opensocket_client, 
+fclosesocket, 
+closesocket_client, 
+seek_client, 
+convfromnetwork, 
+convtonetwork, 
+convfromutf8, 
+progress_client, 
+ioctl_client, 
+timeout, 
+connecttimeout, 
+accepttimeout, 
+happy_eyeballs_timeout, 
+server_response_timeout, 
+tftp_blksize, 
+filesize, 
+low_speed_limit, 
+low_speed_time, 
+max_send_speed, 
+max_recv_speed, 
+set_resume_from, 
+headers, 
+proxyheaders, 
+httppost, 
+quote, 
+postquote, 
+prequote, 
+source_quote, 
+source_prequote, 
+source_postquote, 
+telnet_options, 
+resolve, 
+connect_to, 
+timevalue, 
+httpversion, 
+general_ssl, 
+dns_cache_timeout, 
+buffer_size, 
+upload_buffer_size, 
+private_data, 
+http200aliases, 
+ipver, 
+max_filesize, 
+ssh_keyfunc, 
+ssh_keyfunc_userp, 
+ssh_auth_types, 
+new_file_perms, 
+new_directory_perms, 
+allowed_protocols, 
+redir_protocols, 
+mail_rcpt, 
+rtspversion, 
+chunk_bgn, 
+chunk_end, 
+fnmatch, 
+fnmatch_data, 
+gssapi_delegation, 
+tcp_keepidle, 
+tcp_keepintvl, 
+maxconnects, 
+expect_100_timeout, 
+stream_depends_on, 
+stream_dependents, 
+resolver_start, 
+resolver_start_client, 
+ssl, 
+proxy_ssl, 
+mimepost, 
+str, 
+keep_post, 
+localportrange, 
+is_fread_set, 
+is_fwrite_set, 
+timecondition, 
+httpreq, 
+proxytype, 
+ftp_filemethod, 
+ftp_create_missing_dirs, 
+use_netrc, 
+use_ssl, 
+ftpsslauth, 
+ftp_ccc, 
+scope_id, 
+rtspreq, 
+stream_weight, 
+localport, 
+free_referer, 
+tftp_no_options, 
+sep_headers, 
+cookiesession, 
+crlf, 
+strip_path_slash, 
+ssh_compression, 
+get_filetime, 
+tunnel_thru_httpproxy, 
+prefer_ascii, 
+ftp_append, 
+ftp_list_only, 
+ftp_use_port, 
+hide_progress, 
+http_fail_on_error, 
+http_keep_sending_on_error, 
+http_follow_location, 
+http_transfer_encoding, 
+allow_auth_to_other_hosts, 
+include_header, 
+http_set_referer, 
+http_auto_referer, 
+opt_no_body, 
+upload, 
+2 warnings generated.
+
+verbose, 
+krb, 
+reuse_forbid, 
+reuse_fresh, 
+ftp_use_epsv, 
+ftp_use_eprt, 
+ftp_use_pret, 
+no_signal, 
+global_dns_cache, 
+tcp_nodelay, 
+ignorecl, 
+ftp_skip_ip, 
+connect_only, 
+http_te_skip, 
+http_ce_skip, 
+proxy_transfer_mode, 
+sasl_ir, 
+wildcard_enabled, 
+tcp_keepalive, 
+tcp_fastopen, 
+ssl_enable_npn, 
+ssl_enable_alpn, 
+path_as_is, 
+pipewait, 
+suppress_connect_headers, 
+dns_shuffle_addresses, 
+stream_depends_e, 
+haproxyprotocol, 
+abstract_unix_socket, 
+disallow_username_in_url, 
+consider reordering the fields or adding explicit padding members [clang-analyzer-optin.performance.Padding]
+struct UserDefined {
+       ^
+/project/git/Utilities/cmcurl/lib/urldata.h:1209:8: warning: Excessive padding in 'struct UrlState' (61 padding bytes, where 5 is optimal). 
+Optimal fields order: 
+conn_cache, 
+lastconnect, 
+headerbuff, 
+headersize, 
+buffer, 
+ulbuf, 
+current_speed, 
+first_host, 
+session, 
+sessionage, 
+scratch, 
+prev_signal, 
+resolver, 
+most_recent_ftp_entrypath, 
+crlf_conversions, 
+pathbuffer, 
+path, 
+range, 
+resume_from, 
+rtsp_next_client_CSeq, 
+rtsp_next_server_CSeq, 
+rtsp_CSeq_recv, 
+infilesize, 
+drain, 
+fread_func, 
+in, 
+stream_depends_on, 
+keeps_speed, 
+expiretime, 
+authhost, 
+authproxy, 
+timeoutlist, 
+timenode, 
+digest, 
+proxydigest, 
+tempwrite, 
+expires, 
+first_remote_port, 
+tempcount, 
+os_errno, 
+httpversion, 
+stream_weight, 
+multi_owned_by_easy, 
+this_is_a_follow, 
+refused_stream, 
+errorbuf, 
+allow_port, 
+authproblem, 
+ftp_trying_alternative, 
+wildcardmatch, 
+expect100header, 
+prev_block_had_trailing_cr, 
+slash_removed, 
+use_range, 
+rangestringalloc, 
+done, 
+stream_depends_e, 
+consider reordering the fields or adding explicit padding members [clang-analyzer-optin.performance.Padding]
+struct UrlState {
+       ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:11:3: warning: 3rd function call argument is an uninitialized value [clang-analyzer-core.CallAndMessage]
+  printf("[0x%02X,0x%02X,0x%02X,0x%02X]", static_cast<int>(d[0]),
+  ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:93:3: note: Loop condition is false. Execution continues on line 98
+  for (test_utf8_entry const* e = good_entry; e->n; ++e) {
+  ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:98:3: note: Loop condition is true.  Entering loop body
+  for (test_utf8_char const* c = bad_chars; (*c)[0]; ++c) {
+  ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:99:5: note: Taking false branch
+    if (!decode_bad(*c)) {
+    ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:98:3: note: Loop condition is true.  Entering loop body
+  for (test_utf8_char const* c = bad_chars; (*c)[0]; ++c) {
+  ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:99:10: note: Calling 'decode_bad'
+    if (!decode_bad(*c)) {
+         ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:80:7: note: Assuming 'e' is null
+  if (e) {
+      ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:80:3: note: Taking false branch
+  if (e) {
+  ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:85:3: note: Calling 'report_bad'
+  report_bad(true, s);
+  ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:46:32: note: '?' condition is true
+  printf("%s: decoding bad  ", passed ? "pass" : "FAIL");
+                               ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:47:3: note: Calling 'test_utf8_char_print'
+  test_utf8_char_print(c);
+  ^
+/project/git/Tests/CMakeLib/testUTF8.cxx:11:3: note: 3rd function call argument is an uninitialized value
+  printf("[0x%02X,0x%02X,0x%02X,0x%02X]", static_cast<int>(d[0]),
+  ^
+/project/git/Source/cmServer.cxx:519:3: warning: Call to virtual function during construction [clang-analyzer-optin.cplusplus.VirtualCall]
+  AddNewConnection(connection);
+  ^
+/project/git/Source/cmServer.cxx:519:3: note: This constructor of an object of type 'cmServerBase' has not returned when the virtual method was called
+/project/git/Source/cmServer.cxx:519:3: note: Call to virtual function during construction
Index: clang-tidy/tool/run_clang_tidy.py
===================================================================
--- /dev/null
+++ clang-tidy/tool/run_clang_tidy.py
@@ -0,0 +1 @@
+run-clang-tidy.py
\ No newline at end of file
Index: clang-tidy/tool/run-clang-tidy.py
===================================================================
--- clang-tidy/tool/run-clang-tidy.py
+++ clang-tidy/tool/run-clang-tidy.py
@@ -38,6 +38,7 @@
 
 import argparse
 import glob
+import hashlib
 import json
 import multiprocessing
 import os
@@ -57,6 +58,179 @@
 else:
     import queue as queue
 
+
+class Diagnostic(object):
+    """
+    This class represents a parsed diagnostic message coming from clang-tidy
+    output. While parsing the raw output each new diagnostic will incrementally
+    build a temporary object of this class. Once the end of the diagnotic
+    message is found its content is hashed with SHA256 and stored in a set.
+    """
+
+    def __init__(self, path, line, column, diag):
+        """
+        Start initializing this object. The source location is always known
+        as it is emitted first and always in a single line.
+        `diag` will contain all warning/error/note information until the first
+        line-break. These are very uncommon but for example CSA's
+        PaddingChecker emits a multi-line warning containing the optimal
+        layout of a record. These additional lines must be added after
+        creation of the `Diagnostic`.
+        """
+        self._path = path
+        self._line = line
+        self._column = column
+        self._diag = diag
+        self._additional = ""
+
+    def add_additional_line(self, line):
+        """Store more additional information line per line while parsing."""
+        self._additional += "\n" + line
+
+    def get_fingerprint(self):
+        """Return a secure fingerprint (SHA256 hash) of the diagnostic."""
+        return hashlib.sha256(self.__str__().encode("utf-8", "backslachreplace")).hexdigest()
+
+    def __str__(self):
+        """Transform the object back into a raw diagnostic."""
+        return (self._path + ":" + str(self._line) + ":" + str(self._column)\
+                           + ": " + self._diag + self._additional).encode("ascii", "backslashreplace")
+
+
+class Deduplication(object):
+    """
+    This class provides an interface to deduplicate diagnostics emitted from
+    `clang-tidy`. It maintains a `set` of SHA 256 hashes of the diagnostics
+    and allows to query if an diagnostic is already emitted
+    (according to the corresponding hash of the diagnostic string!).
+    """
+
+    def __init__(self):
+        """Initializes an empty set."""
+        self._set = set()
+
+    def insert_and_query(self, diag):
+        """
+        This method returns True if the `diag` was *NOT* emitted already
+        signaling that the parser shall store/emit this diagnostic.
+        If the `diag` was stored already this method return False and has
+        no effect.
+        """
+        fp = diag.get_fingerprint()
+        if fp not in self._set:
+            self._set.add(fp)
+            return True
+        return False
+
+
+def _is_valid_diag_match(match_groups):
+    """Return true if all elements in `match_groups` are not None."""
+    return all(g is not None for g in match_groups)
+
+
+def _diag_from_match(match_groups):
+    """Helper function to create a diagnostic object from a regex match."""
+    return Diagnostic(
+        str(match_groups[0]), int(match_groups[1]), int(match_groups[2]),
+        str(match_groups[3]) + ": " + str(match_groups[4]))
+
+
+class ParseClangTidyDiagnostics(object):
+    """
+    This class is a stateful parser for `clang-tidy` diagnostic output.
+    The parser collects all unique diagnostics that can be emitted after
+    deduplication.
+    """
+
+    def __init__(self):
+        super(ParseClangTidyDiagnostics, self).__init__()
+        self._diag_re = re.compile(
+            r"^(.+):(\d+):(\d+): (error|warning): (.*)$")
+        self._current_diag = None
+
+        self._dedup = Deduplication()
+        self._uniq_diags = list()
+
+    def reset_parser(self):
+        """
+        Clean the parsing data to prepare for another set of output from
+        `clang-tidy`. The deduplication is not cleaned because that data
+        is required between multiple parsing runs. The diagnostics are cleaned
+        as the parser assumes the new unique diagnostics are consumed before
+        the parser is reset.
+        """
+        self._current_diag = None
+        self._uniq_diags = list()
+
+    def get_diags(self):
+        """
+        Returns a list of diagnostics that can be emitted after parsing the
+        full output of a `clang-tidy` invocation.
+        The list contains no duplicates.
+        """
+        return self._uniq_diags
+
+    def parse_string(self, input_str):
+        """Parse a string, e.g. captured stdout."""
+        if self._current_diag:
+            print("WARNING: FOUND CURRENT DIAG TO BE SET! BUG!!")
+            print("DIAGNOSTIC MESSAGE:")
+            print(str(self._current_diag))
+            print("SETTING _current_diag TO NONE")
+            self._current_diag = None
+        self._parse_lines(input_str.splitlines())
+
+    def _parse_line(self, line):
+        """Parses one line and returns nothing."""
+        match = self._diag_re.match(line)
+
+        # A new diagnostic is found (either error or warning).
+        if match and _is_valid_diag_match(match.groups()):
+            self._handle_new_diag(match.groups())
+
+        # There was no new diagnostic but a previous diagnostic is in flight.
+        # Interpret this situation as additional output like notes or
+        # code-pointers from the diagnostic that is in flight.
+        elif not match and self._current_diag:
+            self._current_diag.add_additional_line(line)
+
+        # There was no diagnostic in flight and this line did not create a
+        # new one. This situation should not occur, but might happen if
+        # `clang-tidy` emits information before warnings start.
+        else:
+            return
+
+    def _handle_new_diag(self, match_groups):
+        """Potentially store an in-flight diagnostic and create a new one."""
+        self._register_diag()
+        self._current_diag = _diag_from_match(match_groups)
+
+    def _register_diag(self):
+        """
+        Stores a potential in-flight diagnostic if it is a new unique message.
+        """
+        # The current in-flight diagnostic was not emitted before, therefor
+        # it should be stored as a new unique diagnostic.
+        if self._current_diag and \
+           self._dedup.insert_and_query(self._current_diag):
+            self._uniq_diags.append(self._current_diag)
+
+    def _parse_lines(self, line_list):
+        """Parse a list of lines without \\n at the end of each string."""
+        assert self._current_diag is None, \
+               "Parser not in a clean state to restart parsing"
+
+        for line in line_list:
+            self._parse_line(line.rstrip())
+        # Register the last diagnostic after all input is parsed.
+        self._register_diag()
+
+    def _parse_file(self, filename):
+        """Helper to parse a full file, for testing purposes only."""
+        with open(filename, "r") as input_file:
+            self._parse_lines(input_file.readlines())
+
+
 def find_compilation_database(path):
   """Adjusts the directory until a compilation database is found."""
   result = './'
@@ -153,7 +327,7 @@
   subprocess.call(invocation)
 
 
-def run_tidy(args, tmpdir, build_path, queue, lock, failed_files):
+def run_tidy(args, tmpdir, build_path, queue, lock, failed_files, parser):
   """Takes filenames out of queue and runs clang-tidy on them."""
   while True:
     name = queue.get()
@@ -166,10 +340,27 @@
     output, err = proc.communicate()
     if proc.returncode != 0:
       failed_files.append(name)
+
     with lock:
-      sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8') + '\n')
+      invoc = ' '.join(invocation) + '\n'
+      if parser:
+        parser.parse_string(output.decode('utf-8', 'backslashreplace'))
+        diags = [str(diag) for diag in parser.get_diags()]
+        diag_str = '\n'.join(diags)
+        sys.stdout.write(''.join([invoc, diag_str]).rstrip().encode('utf-8', 'backslashreplace'))
+        sys.stdout.write('\n')
+        parser.reset_parser()
+      else:
+        sys.stdout.write(invoc + output.decode('utf-8', 'backslashreplace').strip() + '\n')
+      sys.stdout.flush()
+
       if len(err) > 0:
-        sys.stderr.write(err.decode('utf-8') + '\n')
+        err_lines = err.splitlines()
+        errors = [l for l in err_lines if not "warnings generated" in l]
+        for l in errors:
+            sys.stderr.write(l.decode('utf-8', 'backslashreplace'))
+      sys.stderr.flush()
+
     queue.task_done()
 
 
@@ -224,6 +415,8 @@
                       'command line.')
   parser.add_argument('-quiet', action='store_true',
                       help='Run clang-tidy in quiet mode')
+  parser.add_argument('-deduplicate', action='store_true',
+                      help='Deduplicate diagnostic message from clang-tidy')
   args = parser.parse_args()
 
   db_path = 'compile_commands.json'
@@ -269,9 +462,12 @@
     # List of files with a non-zero return code.
     failed_files = []
     lock = threading.Lock()
+    parser = None
+    if args.deduplicate:
+        parser = ParseClangTidyDiagnostics()
     for _ in range(max_task):
       t = threading.Thread(target=run_tidy,
-                           args=(args, tmpdir, build_path, task_queue, lock, failed_files))
+                           args=(args, tmpdir, build_path, task_queue, lock, failed_files, parser))
       t.daemon = True
       t.start()
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to