sammccall updated this revision to Diff 507339.
sammccall added a comment.

Use a process-shared counter for HTML output filenames to avoid clobbering.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146591/new/

https://reviews.llvm.org/D146591

Files:
  clang/include/clang/Analysis/FlowSensitive/Logger.h
  clang/lib/Analysis/FlowSensitive/CMakeLists.txt
  clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
  clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
  clang/lib/Analysis/FlowSensitive/HTMLLogger.css
  clang/lib/Analysis/FlowSensitive/HTMLLogger.js
  clang/lib/Analysis/FlowSensitive/bundle_resources.py
  clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp

Index: clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
===================================================================
--- clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
+++ clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
@@ -9,6 +9,7 @@
 
 namespace clang::dataflow::test {
 namespace {
+using testing::HasSubstr;
 
 struct TestLattice {
   int Elements = 0;
@@ -83,19 +84,24 @@
   void logText(llvm::StringRef Text) override { OS << Text << "\n"; }
 };
 
-TEST(LoggerTest, Sequence) {
+AnalysisInputs<TestAnalysis> makeInputs() {
   const char *Code = R"cpp(
 int target(bool b, int p, int q) {
   return b ? p : q;    
 }
 )cpp";
+  static std::vector<std::string> Args = {
+      "-fsyntax-only", "-fno-delayed-template-parsing", "-std=c++17"};
 
   auto Inputs = AnalysisInputs<TestAnalysis>(
       Code, ast_matchers::hasName("target"),
       [](ASTContext &C, Environment &) { return TestAnalysis(C); });
-  std::vector<std::string> Args = {
-      "-fsyntax-only", "-fno-delayed-template-parsing", "-std=c++17"};
   Inputs.ASTBuildArgs = Args;
+  return Inputs;
+}
+
+TEST(LoggerTest, Sequence) {
+  auto Inputs = makeInputs();
   std::string Log;
   TestLogger Logger(Log);
   Inputs.BuiltinOptions.Log = &Logger;
@@ -148,5 +154,27 @@
 )");
 }
 
+TEST(LoggerTest, HTML) {
+  auto Inputs = makeInputs();
+  std::vector<std::string> Logs;
+  auto Logger = Logger::html([&]() {
+    Logs.emplace_back();
+    return std::make_unique<llvm::raw_string_ostream>(Logs.back());
+  });
+  Inputs.BuiltinOptions.Log = Logger.get();
+
+  ASSERT_THAT_ERROR(checkDataflow<TestAnalysis>(std::move(Inputs),
+                                                [](const AnalysisOutputs &) {}),
+                    llvm::Succeeded());
+
+  // Simple smoke tests: we can't meaningfully test the behavior.
+  ASSERT_THAT(Logs, testing::SizeIs(1));
+  EXPECT_THAT(Logs[0], HasSubstr("function updateSelection")) << "embeds JS";
+  EXPECT_THAT(Logs[0], HasSubstr("html {")) << "embeds CSS";
+  EXPECT_THAT(Logs[0], HasSubstr("b (ImplicitCastExpr")) << "has CFG elements";
+  EXPECT_THAT(Logs[0], HasSubstr("<template id='template-B3:1_B3.1'"))
+      << "has analysis point state";
+}
+
 } // namespace
 } // namespace clang::dataflow::test
Index: clang/lib/Analysis/FlowSensitive/bundle_resources.py
===================================================================
--- /dev/null
+++ clang/lib/Analysis/FlowSensitive/bundle_resources.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# Simple bundler of files into string constants.
+#
+# Usage: bundle-resources.py foo.inc a.js path/b.css ...
+# Produces foo.inc containing:
+#   const char a_js[] = "...";
+#   const char b_css[] = "...";
+import os
+import sys
+
+outfile = sys.argv[1]
+infiles = sys.argv[2:]
+
+with open(outfile, 'w') as out:
+  for filename in infiles:
+    varname = os.path.basename(filename).replace('.', '_')
+    out.write("const char " + varname + "[] = \n");
+    # MSVC limits each chunk of string to 2k.
+    # Not quite enough for the JS file, so split by lines.
+    # The overall limit is 64k, which ought to be enough for anyone.
+    for line in open(filename).read().split('\n'):
+      out.write('  R"x(' + line + ')x" "\\n"\n' )
+    out.write('  ;\n');
Index: clang/lib/Analysis/FlowSensitive/HTMLLogger.js
===================================================================
--- /dev/null
+++ clang/lib/Analysis/FlowSensitive/HTMLLogger.js
@@ -0,0 +1,142 @@
+// Based on selected objects, hide/show sections & populate data from templates.
+//
+// For example, if we the selection is {bb=BB4, elt=BB4.6 iter=BB4:2}:
+//   - show the "block" and "element" sections
+//   - load data from template "BB4"
+//     (this is the basic block stmt list etc)
+//   - load data from template "BB4:2_BB4.6"
+//     (this is the log from executing stmt 6 in the second iteration of BB4)
+//   - apply "bb-select" to items with class class "BB4", etc
+let selection = {};
+function updateSelection(changes) {
+  for (var k in changes) {
+    if (selection[k] == changes[k])
+      delete changes[k]; // not actually changed, don't reload data below
+  }
+  Object.assign(selection, changes);
+
+  function showSection(name, visible) {
+    document.getElementById(name).hidden = !visible;
+  }
+  showSection("block", selection.bb);
+  showSection("element", selection.elt && selection.iter);
+
+  function updateData(keys) {
+    if (keys.some(k => !selection[k])) return; // no data to show
+    if (!keys.some(k => k in changes)) return; // nothing changed
+
+    let templateId = 'template-' + keys.map(k => selection[k]).join('_');
+    let template = document.getElementById(templateId);
+    if (template == null) {
+      console.error("missing template ", templateId);
+      return;
+    }
+    for (child of template.content.children) {
+      var slot = document.getElementById(child.id);
+      slot.replaceChildren(...child.cloneNode(/*deep=*/true).childNodes);
+    }
+  }
+
+  updateData(['bb']);
+  updateData(['iter', 'elt']);
+
+  for (var k in changes)
+    applyClassIf(k + '-select', classSelector(changes[k]));
+}
+
+// Handle a mouse event on a region containing selectable items.
+// This might end up changing the hover state or the selection state.
+//
+// targetSelector describes what target HTML element is selectable.
+// targetToID specifies how to determine the selection from it:
+//   hover: a function from target to the class name to highlight
+//   bb: a function from target to the basic-block name to select (BB4)A
+//   elt: a function from target to the CFG element name to select (BB4.5)
+//   iter: a function from target to the BB iteration to select (BB4:2)
+// If an entry is missing, the selection is unmodified.
+// If an entry is null, the selection is always cleared.
+function mouseEventHandler(event, targetSelector, targetToID) {
+  var target = event.type == "mouseout" ? null : event.target.closest(targetSelector);
+  let selTarget = k => (target && targetToID[k]) ? targetToID[k](target) : null;
+  if (event.type == "click") {
+    let newSel = {};
+    for (var k in targetToID) {
+      if (k == 'hover') continue;
+      let t = selTarget(k);
+      newSel[k] = t;
+    }
+    updateSelection(newSel);
+  } else if ("hover" in targetToID) {
+    applyClassIf("hover", classSelector(selTarget("hover")));
+  }
+}
+function watch(rootSelector, targetSelector, targetToID) {
+  var root = document.querySelector(rootSelector);
+  console.log(root, rootSelector);
+  for (event of ['mouseout', 'mousemove', 'click'])
+    root.addEventListener(event, e => mouseEventHandler(e, targetSelector, targetToID));
+}
+function watchAll(lastIter) {
+  watch('#code', '.c', {
+    hover: e => e.dataset.elt,
+    bb: e => bbClass(e),
+    elt: e => e.dataset.elt,
+    // If we're already viewing an iteration of this BB, stick with the same.
+    iter: e => (selection.iter && selection.bb == bbClass(e)) ? selection.iter : lastIter[bbClass(e)],
+  });
+  watch('#cfg', '.bb', {
+    hover: e => e.id,
+    bb: e => e.id,
+    elt: e => e.id + ".0",
+    iter: e => lastIter[e.id],
+  });
+  watch('#timeline', '.entry', {
+    hover: e => [e.id, e.id.split(':')[0]],
+    bb: e => e.id.split(':')[0],
+    elt: e => e.id + ".0",
+    iter: e => e.id,
+  });
+  watch('#bb-elements', 'tr', {
+    hover: e => e.id,
+    elt: e => e.id,
+  });
+  watch('#iterations', '.chooser', {
+    hover: e => [...e.classList].find(x => x.startsWith('B')),
+    iter: e => [...e.classList].find(x => x.startsWith('B')),
+  });
+}
+function applyClassIf(cls, query) {
+  document.querySelectorAll('.' + cls).forEach(elt => elt.classList.remove(cls));
+  document.querySelectorAll(query).forEach(elt => elt.classList.add(cls));
+  console.log(cls, "=>", query);
+}
+function classSelector(cls) {
+  if (cls == null) return null;
+  if (Array.isArray(cls)) return cls.map(classSelector).join(', ');
+  var escaped = cls.replace('.', '\\.').replace(':', '\\:');
+  // don't require id="foo" class="foo"
+  return '.' + escaped + ", #" + escaped;
+}
+function bbClass(elt) {
+   return [...elt.classList].find(x=>x.startsWith('B') && x.indexOf('.')<0);
+}
+
+// Add a stylesheet defining colors for n basic blocks.
+function addBBColors(n) {
+  let sheet = new CSSStyleSheet();
+  // hex values to subtract from fff to get a base color
+  options = [0x001, 0x010, 0x100, 0x011, 0x101, 0x110, 0x111];
+  function color(hex) {
+    return "#" + hex.toString(16).padStart(3, "0");
+  }
+  function add(selector, property, hex) {
+    sheet.insertRule(`${selector} { ${property}: ${color(hex)}; }`)
+  }
+  for (var i = 0; i < n; ++i) {
+    let opt = options[i%options.length];
+    add(`.B${i}`, 'background-color', 0xfff - 2*opt);
+    add(`#B${i} polygon`, 'fill', 0xfff - 2*opt);
+    add(`#B${i} polygon`, 'stroke', 0x888 - 4*opt);
+  }
+  document.adoptedStyleSheets.push(sheet);
+}
Index: clang/lib/Analysis/FlowSensitive/HTMLLogger.css
===================================================================
--- /dev/null
+++ clang/lib/Analysis/FlowSensitive/HTMLLogger.css
@@ -0,0 +1,95 @@
+html { font-family: sans-serif; }
+body { margin: 0; display: flex; justify-content: left; }
+body > * { box-sizing: border-box; }
+body > section {
+  border: 1px solid black;
+  min-width: 20em;
+  overflow: auto;
+  max-height: 100vh;
+}
+section header {
+  background-color: #008;
+  color: white;
+  font-weight: bold;
+  font-size: large;
+  margin-bottom: 0.5em;
+}
+section h2 {
+  font-size: medium;
+  margin-bottom: 0.5em;
+  padding-top: 0.5em;
+  border-top: 1px solid #aaa;
+}
+#timeline {
+  min-width: 0;
+}
+#timeline .entry.hover {
+  background-color: #aaa;
+}
+#timeline .entry.iter-select {
+  background-color: #aac;
+}
+
+#bb-elements {
+  font-family: monospace;
+  font-size: x-small;
+  border-collapse: collapse;
+}
+#bb-elements td:nth-child(1) {
+  text-align: right;
+  width: 4em;
+  border-right: 1px solid #008;
+  padding: 0.3em 0.5em;
+
+  font-weight: bold;
+  color: #888;
+}
+#bb-elements tr.hover {
+  background-color: #abc;
+}
+#bb-elements tr.elt-select {
+  background-color: #acf;
+}
+#iterations {
+  display: flex;
+}
+#iterations .chooser {
+  flex-grow: 1;
+  text-align: center;
+}
+#iterations .chooser:not(.iter-select).hover {
+  background-color: #aaa;
+}
+#iterations .iter-select {
+  font-weight: bold;
+  background-color: #ccc;
+}
+#iterations .chooser:not(.iter-select) {
+  text-decoration: underline;
+  color: blue;
+}
+
+code.line {
+  display: block;
+  white-space: pre;
+  border-left: 3px solid white;
+}
+code.line:has(.bb-select) {
+  border-left: 3px solid black;
+}
+.c.hover, .bb.hover {
+  filter: saturate(200%) brightness(90%);
+}
+.c.elt-select {
+  box-shadow: inset 0 -4px 2px -2px #a00;
+}
+.bb.bb-select polygon {
+  stroke-width: 4px;
+  filter: brightness(70%) saturate(150%);
+}
+.bb { user-select: none; }
+.bb polygon { fill: white; }
+#cfg {
+  position: relative;
+  margin-left: 0.5em;
+}
Index: clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
===================================================================
--- /dev/null
+++ clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
@@ -0,0 +1,486 @@
+//===-- HTMLLogger.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HTML logger. Given a directory dir/, we write
+// dir/0.html for the first analysis, etc.
+// These files contain a visualization that allows inspecting the CFG and the
+// state of the analysis at each point.
+// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
+// so this file is self-contained.
+//
+// VIEWS
+//
+// The timeline and function view are always shown. These allow selecting basic
+// blocks, statements within them, and processing iterations (BBs are visited
+// multiple times when e.g. loops are involved).
+// These are written directly into the HTML body.
+//
+// There are also listings of particular basic blocks, and dumps of the state
+// at particular analysis points (i.e. BB2 iteration 3 statement 2).
+// These are only shown when the relevant BB/analysis point is *selected*.
+// These are defined inside <template> elements which are copied into the body
+// when needed by javascript.
+//
+// SELECTION
+//
+// This is the only real interactive mechanism.
+//
+// At any given time, there are several named selections, e.g.:
+//   bb: B2               (basic block 0 is selected)
+//   elt: B2.4            (statement 4 is selected)
+//   iter: B2(1)          (iteration 1 of the basic block is selected)
+//   hover: B3            (hovering over basic block 3)
+//
+// The selection is updated by mouse events: hover by moving the mouse and
+// others by clicking. Elements that are click targets generally have attributes
+// (id or data-foo) that define what they should select.
+// See watchAll() in analysis.js for the exact logic.
+//
+// When the "bb" selection is set to "B2":
+//   - elements with class/id "B2" get class "bb-select"
+//   - contents of the <template> id "template-B2" get spliced into the page
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
+#include "clang/Analysis/FlowSensitive/DebugSupport.h"
+#include "clang/Analysis/FlowSensitive/Logger.h"
+#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/Value.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang::dataflow {
+namespace {
+
+void escape(char C, llvm::raw_ostream &OS) {
+  switch (C) {
+  case '<':
+    OS << "&lt;";
+    break;
+  case '&':
+    OS << "&amp;";
+    break;
+  case '"':
+    OS << "&quot;";
+    break;
+  case '\'':
+    OS << "&apos;";
+    break;
+  default:
+    OS << C;
+    break;
+  }
+}
+void escape(llvm::StringRef S, llvm::raw_ostream &OS) {
+  for (char C : S)
+    escape(C, OS);
+}
+// Render a graphviz graph specification to SVG using the `dot` tool.
+llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
+
+using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
+
+class HTMLLogger : public Logger {
+  StreamFactory Streams;
+  std::unique_ptr<llvm::raw_ostream> OS;
+
+  const ControlFlowContext *CFG;
+  // Timeline of iterations of CFG block visitation.
+  std::vector<std::pair<const CFGBlock *, unsigned>> Iters;
+  // Number of times each CFG block has been seen.
+  llvm::DenseMap<const CFGBlock *, unsigned> BlockIters;
+  // The messages logged in the current context but not yet written.
+  std::string ContextLogs;
+  // The number of elements we have visited within the current CFG block.
+  unsigned ElementIndex;
+
+public:
+  explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
+  void beginAnalysis(const ControlFlowContext &CFG,
+                     TypeErasedDataflowAnalysis &A) override {
+    OS = Streams();
+    this->CFG = &CFG;
+    *OS << "<!doctype html>\n";
+    *OS << "<html>\n";
+    *OS << "<head>\n";
+    extern const char HTMLLogger_js[];
+    extern const char HTMLLogger_css[];
+    *OS << "<style>" << HTMLLogger_css << "</style>\n";
+    *OS << "<script>" << HTMLLogger_js << "</script>\n";
+  }
+  // between beginAnalysis() and endAnalysis() we write all the dumps for
+  // particular analysis points into <template>s inside <head>.
+  void endAnalysis() override {
+    // Write the element list for each CFG block. These are in <template>s as
+    // they are only visible when the block is selected.
+    for (const auto& E: BlockIters)
+      writeBlock(*E.first, E.second);
+
+    *OS << "</head>\n";
+
+    *OS << "<body>\n";
+
+    // The timeline shows the sequence of analyses of basic blocks, and lets us
+    // choose a block+iteration to look at.
+    *OS << R"html(<section id="timeline"> <header>Timeline</header>)html";
+    for (const auto& It : Iters) {
+      *OS << llvm::formatv(
+                R"html(<div id="B{0}:{1}" class="entry">B{0} ({1})</div>)html",
+                It.first->getBlockID(), It.second)
+         << "\n";
+    }
+    *OS << "</section>\n";
+
+    // The function section shows the examined code both as source and as a
+    // CFG visualization, and lets us choose a block to look at.
+    *OS << "<section id='function'>\n";
+    *OS << "<header>Function</header>\n";
+    writeCode();
+    writeCFG();
+    *OS << "</section>\n";
+
+    // Placeholder for the block section, which shows BB details.
+    // These are populated from <template>s when a BB is selected.
+    *OS << "<section id='block'>\n";
+    *OS << "<header id='block-header'></header>\n";
+    *OS << "<div id='iterations'></div>\n";
+    *OS << "<table id='bb-elements'></table>\n";
+    *OS << "</section>\n";
+
+    // Placeholder for the element section, which shows a particular BB element
+    // and the analysis state associated with it. Populated from <template>s.
+    *OS << "<section id='element'></section>\n";
+
+    *OS << "<script>\n";
+    // BB colors are added programmatically instead of hardcoded in the CSS file
+    // to avoid boilerplate.
+    *OS << "addBBColors(" << CFG->getCFG().getNumBlockIDs() << ");\n";
+    // Hook up event handlers for selection.
+    // Needs to know how many iterations for each BB: clicking selects the last.
+    *OS << "watchAll(/*lastIters=*/{\n";
+    for (const auto& E : BlockIters)
+      *OS << llvm::formatv("  B{0}: 'B{0}:{1}',\n", E.first->getBlockID(),
+                          E.getSecond());
+    *OS << "});\n";
+    // Initialize selection, ready for incremental updates.
+    *OS << "updateSelection({});\n";
+    *OS << "</script>\n";
+
+    *OS << "</body>\n";
+    *OS << "</html>\n";
+  }
+
+  void enterBlock(const CFGBlock &B) override {
+    Iters.emplace_back(&B, ++BlockIters[&B]);
+    ElementIndex = 0;
+  }
+  void enterElement(const CFGElement &E) override {
+    ++ElementIndex;
+  }
+
+  // Write the analysis state associated with a particular analysis point.
+  // FIXME: this dump is fairly opaque. We should show:
+  //  - values associated with the current Stmt
+  //  - values associated with its children
+  //  - meaningful names for values
+  //  - which boolean values are implied true/false by the flow condition
+  void recordState(TypeErasedDataflowAnalysisState &State) override {
+    unsigned BlockID = Iters.back().first->getBlockID();
+    unsigned Iter = Iters.back().second;
+    *OS << llvm::formatv("<template id='template-B{0}:{1}_B{0}.{2}'>", BlockID,
+                        Iter, ElementIndex);
+    *OS << "<section id='element'>\n";
+    *OS << llvm::formatv(ElementIndex == 0
+                            ? "<header>B{0}({1}) initial state</header>\n"
+                            : "<header>Element B{0}({1}).{2}</header>\n",
+                        BlockID, Iter, ElementIndex);
+
+    if (!ContextLogs.empty()) {
+      *OS << "<h2>Log messages</h2>\n<pre>";
+      *OS << "</pre>\n";
+    }
+    *OS << "<h2>Built-in lattice</h2>\n<pre>";
+    {
+      std::string BuiltinLattice;
+      llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
+      State.Env.dump(BuiltinLatticeS);
+      escape(BuiltinLattice, *OS);
+    }
+    *OS << "</pre>\n";
+    *OS << "</template>";
+  }
+  void blockConverged() override { logText("Block converged"); }
+
+  void logText(llvm::StringRef S) override {
+    ContextLogs.append(S.begin(), S.end());
+    ContextLogs.push_back('\n');
+  }
+
+private:
+  // Write the CFG block details.
+  // Currently this is just the list of elements in execution order.
+  // FIXME: an AST dump would be a useful view, too.
+  void writeBlock(const CFGBlock &B, unsigned Iters) {
+    *OS << "<template id='template-B" << B.getBlockID() << "'>\n";
+
+    *OS << "<header id='block-header'>Block B" << B.getBlockID()
+       << "</header>\n";
+
+    *OS << "<div id='iterations'>\n";
+    for (unsigned I = 1; I <= Iters; ++I)
+      *OS << llvm::formatv("<a class='chooser B{0}:{1}'>Iteration {1}</a>\n",
+                          B.getBlockID(), I);
+    *OS << "</div>\n";
+
+    *OS << "<table id='bb-elements'>\n";
+    *OS << llvm::formatv("<tr id='B{0}.0'><td class='B{0}'>B{0}.0</td>"
+                        "<td><i>(initial state)</i></td></tr>\n",
+                        B.getBlockID());
+    unsigned Index = 0;
+    for (const auto &Elt : B.Elements) {
+      std::string Dump;
+      llvm::raw_string_ostream DumpS(Dump);
+      Elt.dumpToStream(DumpS);
+
+      *OS << llvm::formatv(R"html(<tr id="B{0}.{1}">
+          <td class='B{0}'>B{0}.{1}</td><td>)html",
+                           B.getBlockID(), ++Index);
+      escape(Dump, *OS);
+      *OS << "</td></tr>\n";
+    }
+    *OS << "</table>";
+
+    *OS << "</template>";
+  }
+
+  // Write the code of function being examined.
+  // We want to overlay the code with <span>s that mark which BB particular
+  // tokens are associated with, and even which BB element (so that clicking
+  // can select the right element).
+  void writeCode() {
+    if (!CFG->getDecl())
+      return;
+    const auto &AST = CFG->getDecl()->getASTContext();
+    bool Invalid = false;
+
+    // Extract the source code from the original file.
+    // Pretty-printing from the AST would probably be nicer (no macros or
+    // indentation to worry about), but we need the boundaries of particular
+    // AST nodes and the printer doesn't provide this.
+    auto Range = clang::Lexer::makeFileCharRange(
+        CharSourceRange::getTokenRange(CFG->getDecl()->getSourceRange()),
+        AST.getSourceManager(), AST.getLangOpts());
+    if (Range.isInvalid())
+      return;
+    llvm::StringRef Code = clang::Lexer::getSourceText(
+        Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
+    if (Invalid)
+      return;
+
+    static constexpr unsigned Missing = -1;
+    // TokenInfo stores the BB and set of elements that a token is part of.
+    struct TokenInfo {
+      // The basic block this is part of.
+      // This is the BB of the stmt with the smallest containing range.
+      unsigned BB = Missing;
+      unsigned BBPriority = 0;
+      // The most specific stmt this is part of (smallest range).
+      unsigned Elt = Missing;
+      unsigned EltPriority = 0;
+      // All stmts this is part of.
+      SmallVector<unsigned> Elts;
+
+      // Mark this token as being part of BB.Elt.
+      // RangeLen is the character length of the element's range, used to
+      // distinguish inner vs outer statements.
+      // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
+      // However "a" has a smaller range, so is more specific. Clicking on the
+      // token "a" should select the stmt "a".
+      void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
+        // A worse BB (larger range) => ignore.
+        if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
+          return;
+        if (BB != this->BB) {
+          this->BB = BB;
+          Elts.clear();
+          BBPriority = RangeLen;
+        }
+        BBPriority = std::min(BBPriority, RangeLen);
+        Elts.push_back(Elt);
+        if (this->Elt == Missing || EltPriority > RangeLen)
+          this->Elt = Elt;
+      }
+      bool operator==(const TokenInfo &Other) const {
+        return std::tie(BB, Elt, Elts) ==
+               std::tie(Other.BB, Other.Elt, Other.Elts);
+      }
+      // Write the attributes for the <span> on this token.
+      void write(llvm::raw_ostream &OS) const {
+        OS << "class='c";
+        if (BB != Missing)
+          OS << " B" << BB;
+        for (unsigned Elt : Elts)
+          OS << " B" << BB << "." << Elt;
+        OS << "'";
+
+        if (Elt != Missing)
+          OS << " data-elt='B" << BB << "." << Elt << "''";
+      }
+    };
+
+    // Construct one TokenInfo per character in a flat array.
+    // This is inefficient (chars in a token all have the same info) but simple.
+    std::vector<TokenInfo> State(Code.size());
+    for (const auto *Block : CFG->getCFG()) {
+      unsigned EltIndex = 0;
+      for (const auto& Elt : *Block) {
+        ++EltIndex;
+        if (const auto S = Elt.getAs<CFGStmt>()) {
+          auto EltRange = clang::Lexer::makeFileCharRange(
+              CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
+              AST.getSourceManager(), AST.getLangOpts());
+          if (EltRange.isInvalid())
+            continue;
+          if (EltRange.getBegin() < Range.getBegin() ||
+              EltRange.getEnd() >= Range.getEnd() ||
+              EltRange.getEnd() < Range.getBegin() ||
+              EltRange.getEnd() >= Range.getEnd())
+            continue;
+          unsigned Off = EltRange.getBegin().getRawEncoding() -
+                         Range.getBegin().getRawEncoding();
+          unsigned Len = EltRange.getEnd().getRawEncoding() -
+                         EltRange.getBegin().getRawEncoding();
+          for (unsigned I = 0; I < Len; ++I)
+            State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
+        }
+      }
+    }
+
+    // Finally, write the code with the correct <span>s.
+    *OS << "<div id='code'>\n<code class='line'>";
+    for (unsigned I = 0; I < Code.size(); ++I) {
+      // Don't actually write a <span> around each character, only break spans
+      // when the TokenInfo changes.
+      bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
+      bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
+      if (NeedOpen) {
+        *OS << "<span ";
+        State[I].write(*OS);
+        *OS << ">";
+      }
+      if (Code[I] == '\n')
+        *OS << "</code>\n<code class='line'>";
+      else
+        escape(Code[I], *OS);
+      if (NeedClose) *OS << "</span>";
+    }
+    *OS << "</code>\n";
+    *OS << "</div>";
+  }
+
+  // Write the CFG diagram, a graph of basic blocks.
+  // Laying out graphs is hard, so we construct a graphviz description and shell
+  // out to `dot` to turn it into an SVG.
+  void writeCFG() {
+    *OS << "<div id='cfg'>\n";
+    if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG())))
+      *OS << *SVG;
+    else
+      *OS << "Can't draw CFG: " << toString(SVG.takeError());
+    *OS << "</div>\n";
+  }
+
+  // Produce a graphviz description of a CFG.
+  static std::string buildCFGDot(const clang::CFG &CFG) {
+    std::string Graph;
+    llvm::raw_string_ostream GraphS(Graph);
+    // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
+    GraphS << R"(digraph {
+      tooltip=" "
+      node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
+      edge[tooltip = " "]
+)";
+    for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I)
+      GraphS << "  B" << I << " [id=B" << I << "]\n";
+    for (const auto *Block : CFG) {
+      for (const auto &Succ : Block->succs()) {
+        GraphS << "  B" << Block->getBlockID() << " -> B"
+               << Succ.getReachableBlock()->getBlockID() << "\n";
+      }
+    }
+    GraphS << "}\n";
+    return Graph;
+  }
+};
+
+// Nothing interesting here, just subprocess/temp-file plumbing.
+llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
+  auto Dot = llvm::sys::findProgramByName("dot");
+  if (!Dot)
+    return llvm::createStringError(Dot.getError(),
+                                   "Can't draw CFG: 'dot' not found on PATH");
+
+  // Create input and output files for `dot` subprocess.
+  // (We create the output file as empty, to reserve the temp filename).
+  llvm::SmallString<256> Input, Output;
+  int InputFD;
+  if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
+                                                   Input))
+    return llvm::createStringError(EC, "failed to create `dot` temp input");
+  llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
+  auto DeleteInput =
+      llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
+  if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
+    return llvm::createStringError(EC, "failed to create `dot` temp output");
+  auto DeleteOutput =
+      llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
+
+  std::vector<std::optional<llvm::StringRef>> Redirects = {
+      Input, Output,
+      /*stderr=*/std::nullopt};
+  std::string ErrMsg;
+  int Code = llvm::sys::ExecuteAndWait(
+      *Dot, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
+      /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
+  if (!ErrMsg.empty())
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "'dot' failed: " + ErrMsg);
+  if (Code != 0)
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "'dot' failed (" + llvm::Twine(Code) + ")");
+
+  auto Buf = llvm::MemoryBuffer::getFile(Output);
+  if (!Buf)
+    return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
+
+  // Output has <?xml> prefix we don't want. Skip to <svg> tag.
+  llvm::StringRef Result = Buf.get()->getBuffer();
+  auto Pos = Result.find("<svg");
+  if (Pos == llvm::StringRef::npos)
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "Can't find <svg> tag in `dot` output");
+  return Result.substr(Pos).str();
+}
+
+#include "HTMLLogger.inc" // defines HTMLLogger_{js,css}.
+
+} // namespace
+
+std::unique_ptr<Logger>
+Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
+  return std::make_unique<HTMLLogger>(std::move(Streams));
+}
+
+} // namespace clang::dataflow
Index: clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
===================================================================
--- clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
+++ clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
@@ -20,14 +20,17 @@
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
 #include <cassert>
 #include <memory>
 #include <utility>
 
-static llvm::cl::opt<std::string>
-    DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
-                llvm::cl::desc("Emit log of dataflow analysis. With no arg, "
-                               "writes textual log to stderr."));
+static llvm::cl::opt<std::string> DataflowLog(
+    "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
+    llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
+                   "log to stderr. With an arg, writes HTML logs under the "
+                   "specified directory (one per analyzed function)."));
 
 namespace clang {
 namespace dataflow {
@@ -381,6 +384,34 @@
   return nullptr;
 }
 
+std::unique_ptr<Logger> flagLogger() {
+  if (DataflowLog.empty())
+    return Logger::textual(llvm::errs());
+
+  llvm::StringRef Dir = DataflowLog;
+  if (auto EC = llvm::sys::fs::create_directories(Dir))
+    llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
+  // Separate analyses will create loggers writing to the same directory.
+  // Share a counter so they don't all overwrite each other's 0.html.
+  // (Don't share a logger, it's not threadsafe).
+  static std::atomic<unsigned> Counter = {0};
+  auto StreamFactory =
+      [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
+    llvm::SmallString<256> File(Dir);
+    llvm::sys::path::append(File,
+                            std::to_string(Counter.fetch_add(1)) + ".html");
+    std::error_code EC;
+    auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
+    if (EC) {
+      llvm::errs() << "Failed to create log " << File << ": " << EC.message()
+                   << "\n";
+      return std::make_unique<llvm::raw_null_ostream>();
+    }
+    return OS;
+  };
+  return Logger::html(std::move(StreamFactory));
+}
+
 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S,
                                                  Options Opts)
     : S(std::move(S)), TrueVal(createAtomicBoolValue()),
@@ -390,9 +421,8 @@
   // This is ugly but provides a uniform method for ad-hoc debugging dataflow-
   // based tools.
   if (Opts.Log == nullptr && DataflowLog.getNumOccurrences()) {
-    LogOwner = Logger::textual(llvm::errs());
+    LogOwner = flagLogger();
     this->Opts.Log = LogOwner.get();
-    // FIXME: if the flag is given a value, write an HTML log to a file.
   }
 }
 
Index: clang/lib/Analysis/FlowSensitive/CMakeLists.txt
===================================================================
--- clang/lib/Analysis/FlowSensitive/CMakeLists.txt
+++ clang/lib/Analysis/FlowSensitive/CMakeLists.txt
@@ -2,6 +2,7 @@
   ControlFlowContext.cpp
   DataflowAnalysisContext.cpp
   DataflowEnvironment.cpp
+  HTMLLogger.cpp
   Logger.cpp
   Transfer.cpp
   TypeErasedDataflowAnalysis.cpp
@@ -16,3 +17,14 @@
   )
 
 add_subdirectory(Models)
+
+add_custom_command(OUTPUT HTMLLogger.inc
+  COMMAND "${Python3_EXECUTABLE}" bundle_resources.py 
+  ${CMAKE_CURRENT_BINARY_DIR}/HTMLLogger.inc
+  HTMLLogger.css HTMLLogger.js
+  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  COMMENT "Bundling HTMLLogger resources"
+  DEPENDS bundle_resources.py HTMLLogger.css HTMLLogger.js
+  VERBATIM)
+add_custom_target(clangAnalysisFlowSensitiveResources DEPENDS HTMLLogger.inc)
+add_dependencies(clangAnalysisFlowSensitive clangAnalysisFlowSensitiveResources)
\ No newline at end of file
Index: clang/include/clang/Analysis/FlowSensitive/Logger.h
===================================================================
--- clang/include/clang/Analysis/FlowSensitive/Logger.h
+++ clang/include/clang/Analysis/FlowSensitive/Logger.h
@@ -31,6 +31,13 @@
   /// A logger that simply writes messages to the specified ostream in real
   /// time.
   static std::unique_ptr<Logger> textual(llvm::raw_ostream &);
+  // A logger that builds an HTML UI to inspect the analysis results.
+  // One file is written under the specified dir per analyzed function.
+  static std::unique_ptr<Logger> html(llvm::StringRef Dir);
+  // A logger that builds an HTML UI to inspect the analysis results.
+  // Each function's analysis is written to a stream obtained from the factory.
+  static std::unique_ptr<Logger>
+      html(std::function<std::unique_ptr<llvm::raw_ostream>()>);
 
   virtual ~Logger() = default;
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to