jranieri-grammatech updated this revision to Diff 234971.
jranieri-grammatech added a comment.

Addressed review comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70689/new/

https://reviews.llvm.org/D70689

Files:
  clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
  
clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
  
clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
  clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c

Index: clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c
===================================================================
--- clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c
+++ clang/test/Analysis/diagnostics/sarif-multi-diagnostic-test.c
@@ -30,11 +30,17 @@
   return 0;
 }
 
+int unicode() {
+  int løçål = 0;
+  /* ☃ */ return 1 / løçål; // expected-warning {{Division by zero}}
+}
+
 int main(void) {
   f();
   g();
   h(0);
   leak(0);
+  unicode();
   return 0;
 }
 
Index: clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
===================================================================
--- clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
+++ clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
@@ -4,7 +4,7 @@
     {
       "artifacts": [
         {
-          "length": 951,
+          "length": 1077,
           "location": {
           },
           "mimeType": "text/plain",
@@ -13,6 +13,7 @@
           ]
         }
       ],
+      "columnKind": "unicodeCodePoints",
       "results": [
         {
           "codeFlows": [
@@ -32,9 +33,9 @@
                           },
                           "region": {
                             "endColumn": 6,
-                            "endLine": 34,
+                            "endLine": 39,
                             "startColumn": 3,
-                            "startLine": 34
+                            "startLine": 39
                           }
                         }
                       }
@@ -102,9 +103,9 @@
                           },
                           "region": {
                             "endColumn": 6,
-                            "endLine": 35,
+                            "endLine": 40,
                             "startColumn": 3,
-                            "startLine": 35
+                            "startLine": 40
                           }
                         }
                       }
@@ -120,7 +121,7 @@
                             "index": 0,
                           },
                           "region": {
-                            "endColumn": 11,
+                            "endColumn": 12,
                             "endLine": 15,
                             "startColumn": 3,
                             "startLine": 15
@@ -363,6 +364,74 @@
           },
           "ruleId": "unix.Malloc",
           "ruleIndex": 3
+        },
+        {
+          "codeFlows": [
+            {
+              "threadFlows": [
+                {
+                  "locations": [
+                    {
+                      "importance": "essential",
+                      "location": {
+                        "message": {
+                          "text": "'løçål' initialized to 0"
+                        },
+                        "physicalLocation": {
+                          "artifactLocation": {
+                            "index": 0,
+                          },
+                          "region": {
+                            "endColumn": 12,
+                            "endLine": 34,
+                            "startColumn": 3,
+                            "startLine": 34
+                          }
+                        }
+                      }
+                    },
+                    {
+                      "importance": "essential",
+                      "location": {
+                        "message": {
+                          "text": "Division by zero"
+                        },
+                        "physicalLocation": {
+                          "artifactLocation": {
+                            "index": 0,
+                          },
+                          "region": {
+                            "endColumn": 20,
+                            "startColumn": 20,
+                            "startLine": 35
+                          }
+                        }
+                      }
+                    }
+                  ]
+                }
+              ]
+            }
+          ],
+          "locations": [
+            {
+              "physicalLocation": {
+                "artifactLocation": {
+                  "index": 0,
+                },
+                "region": {
+                  "endColumn": 20,
+                  "startColumn": 20,
+                  "startLine": 35
+                }
+              }
+            }
+          ],
+          "message": {
+            "text": "Division by zero"
+          },
+          "ruleId": "core.DivideZero",
+          "ruleIndex": 2
         }
       ],
       "tool": {
Index: clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
===================================================================
--- clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
+++ clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
@@ -13,6 +13,7 @@
           ]
         }
       ],
+      "columnKind": "unicodeCodePoints",
       "results": [
         {
           "codeFlows": [
Index: clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
+++ clang/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
@@ -17,6 +17,7 @@
 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/Path.h"
 
@@ -27,10 +28,12 @@
 namespace {
 class SarifDiagnostics : public PathDiagnosticConsumer {
   std::string OutputFile;
+  const LangOptions &LO;
 
 public:
-  SarifDiagnostics(AnalyzerOptions &, const std::string &Output)
-      : OutputFile(Output) {}
+  SarifDiagnostics(AnalyzerOptions &, const std::string &Output,
+                   const LangOptions &LO)
+      : OutputFile(Output), LO(LO) {}
   ~SarifDiagnostics() override = default;
 
   void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
@@ -45,9 +48,9 @@
 
 void ento::createSarifDiagnosticConsumer(
     AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C,
-    const std::string &Output, const Preprocessor &,
+    const std::string &Output, const Preprocessor &PP,
     const cross_tu::CrossTranslationUnitContext &) {
-  C.push_back(new SarifDiagnostics(AnalyzerOpts, Output));
+  C.push_back(new SarifDiagnostics(AnalyzerOpts, Output, PP.getLangOpts()));
 }
 
 static StringRef getFileName(const FileEntry &FE) {
@@ -142,26 +145,55 @@
   return json::Object{{"uri", FileURI}, {"index", Index}};
 }
 
-static json::Object createTextRegion(SourceRange R, const SourceManager &SM) {
+static unsigned int adjustColumnPos(const SourceManager &SM, SourceLocation Loc,
+                                    unsigned int TokenLen = 0) {
+  assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
+
+  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedExpansionLoc(Loc);
+  assert(LocInfo.second > SM.getExpansionColumnNumber(Loc) &&
+         "position in file is before column number?");
+
+  const MemoryBuffer *Buf = SM.getBuffer(LocInfo.first);
+  assert(Buf && "got a NULL buffer for the location's file");
+  assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
+         "token extends past end of buffer?");
+
+  // Adjust the offset to be the start of the line, since we'll be counting
+  // Unicode characters from there until our column offset.
+  unsigned int Off = LocInfo.second - (SM.getExpansionColumnNumber(Loc) - 1);
+  unsigned int Ret = 1;
+  while (Off < (LocInfo.second + TokenLen)) {
+    Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
+    Ret++;
+  }
+
+  return Ret;
+}
+
+static json::Object createTextRegion(const LangOptions &LO, SourceRange R,
+                                     const SourceManager &SM) {
   json::Object Region{
       {"startLine", SM.getExpansionLineNumber(R.getBegin())},
-      {"startColumn", SM.getExpansionColumnNumber(R.getBegin())},
+      {"startColumn", adjustColumnPos(SM, R.getBegin())},
   };
   if (R.getBegin() == R.getEnd()) {
-    Region["endColumn"] = SM.getExpansionColumnNumber(R.getBegin());
+    Region["endColumn"] = adjustColumnPos(SM, R.getBegin());
   } else {
     Region["endLine"] = SM.getExpansionLineNumber(R.getEnd());
-    Region["endColumn"] = SM.getExpansionColumnNumber(R.getEnd()) + 1;
+    Region["endColumn"] = adjustColumnPos(
+        SM, R.getEnd(),
+        Lexer::MeasureTokenLength(R.getEnd(), SM, LO));
   }
   return Region;
 }
 
-static json::Object createPhysicalLocation(SourceRange R, const FileEntry &FE,
+static json::Object createPhysicalLocation(const LangOptions &LO,
+                                           SourceRange R, const FileEntry &FE,
                                            const SourceManager &SMgr,
                                            json::Array &Artifacts) {
   return json::Object{
       {{"artifactLocation", createArtifactLocation(FE, Artifacts)},
-       {"region", createTextRegion(R, SMgr)}}};
+       {"region", createTextRegion(LO, R, SMgr)}}};
 }
 
 enum class Importance { Important, Essential, Unimportant };
@@ -213,7 +245,8 @@
   return Importance::Unimportant;
 }
 
-static json::Object createThreadFlow(const PathPieces &Pieces,
+static json::Object createThreadFlow(const LangOptions &LO,
+                                     const PathPieces &Pieces,
                                      json::Array &Artifacts) {
   const SourceManager &SMgr = Pieces.front()->getLocation().getManager();
   json::Array Locations;
@@ -221,7 +254,7 @@
     const PathDiagnosticLocation &P = Piece->getLocation();
     Locations.push_back(createThreadFlowLocation(
         createLocation(createPhysicalLocation(
-                           P.asRange(),
+                           LO, P.asRange(),
                            *P.asLocation().getExpansionLoc().getFileEntry(),
                            SMgr, Artifacts),
                        Piece->getString()),
@@ -230,13 +263,15 @@
   return json::Object{{"locations", std::move(Locations)}};
 }
 
-static json::Object createCodeFlow(const PathPieces &Pieces,
+static json::Object createCodeFlow(const LangOptions &LO,
+                                   const PathPieces &Pieces,
                                    json::Array &Artifacts) {
   return json::Object{
-      {"threadFlows", json::Array{createThreadFlow(Pieces, Artifacts)}}};
+      {"threadFlows", json::Array{createThreadFlow(LO, Pieces, Artifacts)}}};
 }
 
-static json::Object createResult(const PathDiagnostic &Diag,
+static json::Object createResult(const LangOptions &LO,
+                                 const PathDiagnostic &Diag,
                                  json::Array &Artifacts,
                                  const StringMap<unsigned> &RuleMapping) {
   const PathPieces &Path = Diag.path.flatten(false);
@@ -247,10 +282,10 @@
 
   return json::Object{
       {"message", createMessage(Diag.getVerboseDescription())},
-      {"codeFlows", json::Array{createCodeFlow(Path, Artifacts)}},
+      {"codeFlows", json::Array{createCodeFlow(LO, Path, Artifacts)}},
       {"locations",
        json::Array{createLocation(createPhysicalLocation(
-           Diag.getLocation().asRange(),
+           LO, Diag.getLocation().asRange(),
            *Diag.getLocation().asLocation().getExpansionLoc().getFileEntry(),
            SMgr, Artifacts))}},
       {"ruleIndex", Iter->getValue()},
@@ -320,18 +355,20 @@
                               {"rules", createRules(Diags, RuleMapping)}}}};
 }
 
-static json::Object createRun(std::vector<const PathDiagnostic *> &Diags) {
+static json::Object createRun(const LangOptions &LO,
+                              std::vector<const PathDiagnostic *> &Diags) {
   json::Array Results, Artifacts;
   StringMap<unsigned> RuleMapping;
   json::Object Tool = createTool(Diags, RuleMapping);
   
   llvm::for_each(Diags, [&](const PathDiagnostic *D) {
-    Results.push_back(createResult(*D, Artifacts, RuleMapping));
+    Results.push_back(createResult(LO, *D, Artifacts, RuleMapping));
   });
 
   return json::Object{{"tool", std::move(Tool)},
                       {"results", std::move(Results)},
-                      {"artifacts", std::move(Artifacts)}};
+                      {"artifacts", std::move(Artifacts)},
+                      {"columnKind", "unicodeCodePoints"}};
 }
 
 void SarifDiagnostics::FlushDiagnosticsImpl(
@@ -351,6 +388,6 @@
       {"$schema",
        "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"},
       {"version", "2.1.0"},
-      {"runs", json::Array{createRun(Diags)}}};
+      {"runs", json::Array{createRun(LO, Diags)}}};
   OS << llvm::formatv("{0:2}\n", json::Value(std::move(Sarif)));
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to