sammccall updated this revision to Diff 508517.
sammccall marked 4 inline comments as done.
sammccall edited the summary of this revision.
sammccall added a comment.

Address Dmitri's comments
Update demo link


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D146591/new/

https://reviews.llvm.org/D146591

Files:
  clang/include/clang/Analysis/FlowSensitive/Logger.h
  clang/lib/Analysis/FlowSensitive/CMakeLists.txt
  clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
  clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
  clang/lib/Analysis/FlowSensitive/HTMLLogger.css
  clang/lib/Analysis/FlowSensitive/HTMLLogger.html
  clang/lib/Analysis/FlowSensitive/HTMLLogger.js
  clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
  clang/utils/bundle_resources.py

Index: clang/utils/bundle_resources.py
===================================================================
--- /dev/null
+++ clang/utils/bundle_resources.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+
+#===- bundle_resources.py - Generate string constants with file contents. ===
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===
+
+# Usage: bundle-resources.py foo.inc a.js path/b.css ...
+# Produces foo.inc containing:
+#   const char a_js[] = "...";
+#   const char b_css[] = "...";
+import os
+import sys
+
+outfile = sys.argv[1]
+infiles = sys.argv[2:]
+
+with open(outfile, 'w') as out:
+  for filename in infiles:
+    varname = os.path.basename(filename).replace('.', '_')
+    out.write("const char " + varname + "[] = \n");
+    # MSVC limits each chunk of string to 2k, so split by lines.
+    # The overall limit is 64k, which ought to be enough for anyone.
+    for line in open(filename).read().split('\n'):
+      out.write('  R"x(' + line + ')x" "\\n"\n' )
+    out.write('  ;\n');
Index: clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
===================================================================
--- clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
+++ clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp
@@ -9,6 +9,7 @@
 
 namespace clang::dataflow::test {
 namespace {
+using testing::HasSubstr;
 
 struct TestLattice {
   int Elements = 0;
@@ -83,19 +84,24 @@
   void logText(llvm::StringRef Text) override { OS << Text << "\n"; }
 };
 
-TEST(LoggerTest, Sequence) {
+AnalysisInputs<TestAnalysis> makeInputs() {
   const char *Code = R"cpp(
 int target(bool b, int p, int q) {
   return b ? p : q;    
 }
 )cpp";
+  static const std::vector<std::string> Args = {
+      "-fsyntax-only", "-fno-delayed-template-parsing", "-std=c++17"};
 
   auto Inputs = AnalysisInputs<TestAnalysis>(
       Code, ast_matchers::hasName("target"),
       [](ASTContext &C, Environment &) { return TestAnalysis(C); });
-  std::vector<std::string> Args = {
-      "-fsyntax-only", "-fno-delayed-template-parsing", "-std=c++17"};
   Inputs.ASTBuildArgs = Args;
+  return Inputs;
+}
+
+TEST(LoggerTest, Sequence) {
+  auto Inputs = makeInputs();
   std::string Log;
   TestLogger Logger(Log);
   Inputs.BuiltinOptions.Log = &Logger;
@@ -148,5 +154,29 @@
 )");
 }
 
+TEST(LoggerTest, HTML) {
+  auto Inputs = makeInputs();
+  std::vector<std::string> Logs;
+  auto Logger = Logger::html([&]() {
+    Logs.emplace_back();
+    return std::make_unique<llvm::raw_string_ostream>(Logs.back());
+  });
+  Inputs.BuiltinOptions.Log = Logger.get();
+
+  ASSERT_THAT_ERROR(checkDataflow<TestAnalysis>(std::move(Inputs),
+                                                [](const AnalysisOutputs &) {}),
+                    llvm::Succeeded());
+
+  // Simple smoke tests: we can't meaningfully test the behavior.
+  ASSERT_THAT(Logs, testing::SizeIs(1));
+  EXPECT_THAT(Logs[0], HasSubstr("function updateSelection")) << "embeds JS";
+  EXPECT_THAT(Logs[0], HasSubstr("html {")) << "embeds CSS";
+  EXPECT_THAT(Logs[0], HasSubstr("b (ImplicitCastExpr")) << "has CFG elements";
+  EXPECT_THAT(Logs[0], HasSubstr("\"B3:1_B3.1\":"))
+      << "has analysis point state";
+  EXPECT_THAT(Logs[0], HasSubstr("transferBranch(0)")) << "has analysis logs";
+  EXPECT_THAT(Logs[0], HasSubstr("LocToVal")) << "has built-in lattice dump";
+}
+
 } // namespace
 } // namespace clang::dataflow::test
Index: clang/lib/Analysis/FlowSensitive/HTMLLogger.js
===================================================================
--- /dev/null
+++ clang/lib/Analysis/FlowSensitive/HTMLLogger.js
@@ -0,0 +1,210 @@
+//===-- HTMLLogger.js -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Based on selected objects, hide/show sections & populate data from templates.
+//
+// For example, if we the selection is {bb=BB4, elt=BB4.6 iter=BB4:2}:
+//   - show the "block" and "element" sections
+//   - re-render templates within these sections (if selection changed)
+//   - apply "bb-select" to items with class class "BB4", etc
+let selection = {};
+function updateSelection(changes, data) {
+  Object.assign(selection, changes);
+
+  data = Object.create(data);
+  data.selection = selection;
+  for (root of document.querySelectorAll('[data-selection]'))
+    updateSection(root, data);
+
+  for (var k in changes)
+    applyClassIf(k + '-select', classSelector(changes[k]));
+}
+
+// Given <section data-selection="x,y">:
+//  - hide section if selections x or y are null
+//  - re-render templates if x or y have changed
+function updateSection(root, data) {
+  let changed = root.selection == null;
+  root.selection ||= {};
+  for (key of root.dataset.selection.split(',')) {
+    if (!key) continue;
+    if (data.selection[key] != root.selection[key]) {
+      root.selection[key] = data.selection[key];
+      changed = true;
+    }
+    if (data.selection[key] == null) {
+      root.hidden = true;
+      return;
+    }
+  }
+  root.hidden = false;
+  if (changed) {
+    for (tmpl of root.getElementsByTagName('template')) {
+      // Clear previously rendered template contents.
+      while (tmpl.nextSibling && tmpl.nextSibling.inflated)
+        tmpl.parentNode.removeChild(tmpl.nextSibling);
+      inflate(tmpl, data, tmpl.parentNode, tmpl.nextSibling);
+    }
+  }
+}
+
+// Expands template `tmpl` based on input `data`:
+//  - interpolates {{expressions}} in text and attributes
+//  - <template> tags can modify expansion: if, for etc
+// Outputs to `parent` element, inserting before `next`.
+function inflate(tmpl, data, parent, next) {
+  // We use eval() as our expression language in templates!
+  // The templates are static and trusted.
+  let evalExpr = (expr, data) => eval('with (data) { ' + expr + ' }');
+  let interpolate = (str, data) =>
+      str.replace(/\{\{(.*?)\}\}/g, (_, expr) => evalExpr(expr, data))
+  // Anything other than <template> tag: copy, interpolate, recursively inflate.
+  if (tmpl.nodeName != 'TEMPLATE') {
+    let clone = tmpl.cloneNode();
+    clone.inflated = true;
+    if (clone instanceof Text)
+      clone.textContent = interpolate(clone.textContent, data);
+    if (clone instanceof Element) {
+      for (attr of clone.attributes)
+        attr.value = interpolate(attr.value, data);
+      for (c of tmpl.childNodes)
+        inflate(c, data, clone, /*next=*/null);
+    }
+    return parent.insertBefore(clone, next);
+  }
+  // <template> tag handling. Base case: recursively inflate.
+  function handle(data) {
+    for (c of tmpl.content.childNodes)
+      inflate(c, data, parent, next);
+  }
+  // Directives on <template> tags modify behavior.
+  const directives = {
+    // data-for="x in expr": expr is enumerable, bind x to each in turn
+    'for': (nameInExpr, data, proceed) => {
+      let [name, expr] = nameInExpr.split(' in ');
+      let newData = Object.create(data);
+      let index = 0;
+      for (val of evalExpr(expr, data) || []) {
+        newData[name] = val;
+        newData[name + '_index'] = index++;
+        proceed(newData);
+      }
+    },
+    // data-if="expr": only include contents if expression is truthy
+    'if': (expr, data, proceed) => { if (evalExpr(expr, data)) proceed(data); },
+    // data-let="x = expr": bind x to value of expr
+    'let': (nameEqExpr, data, proceed) => {
+      let [name, expr] = nameEqExpr.split(' = ');
+      let newData = Object.create(data);
+      newData[name] = evalExpr(expr, data);
+      proceed(newData);
+    },
+  }
+  // Compose directive handlers on top of the base handler.
+  for (let [dir, value] of Object.entries(tmpl.dataset).reverse()) {
+    if (dir in directives) {
+      let proceed = handle;
+      handle = (data) => directives[dir](value, data, proceed);
+    }
+  }
+  handle(data);
+}
+
+// Handle a mouse event on a region containing selectable items.
+// This might end up changing the hover state or the selection state.
+//
+// targetSelector describes what target HTML element is selectable.
+// targetToID specifies how to determine the selection from it:
+//   hover: a function from target to the class name to highlight
+//   bb: a function from target to the basic-block name to select (BB4)A
+//   elt: a function from target to the CFG element name to select (BB4.5)
+//   iter: a function from target to the BB iteration to select (BB4:2)
+// If an entry is missing, the selection is unmodified.
+// If an entry is null, the selection is always cleared.
+function mouseEventHandler(event, targetSelector, targetToID, data) {
+  var target = event.type == "mouseout" ? null : event.target.closest(targetSelector);
+  let selTarget = k => (target && targetToID[k]) ? targetToID[k](target) : null;
+  if (event.type == "click") {
+    let newSel = {};
+    for (var k in targetToID) {
+      if (k == 'hover') continue;
+      let t = selTarget(k);
+      newSel[k] = t;
+    }
+    updateSelection(newSel, data);
+  } else if ("hover" in targetToID) {
+    applyClassIf("hover", classSelector(selTarget("hover")));
+  }
+}
+function watch(rootSelector, targetSelector, targetToID, data) {
+  var root = document.querySelector(rootSelector);
+  for (event of ['mouseout', 'mousemove', 'click'])
+    root.addEventListener(event, e => mouseEventHandler(e, targetSelector, targetToID, data));
+}
+function watchSelection(data) {
+  let lastIter = (bb) => `${bb}:${data.cfg[bb].iters}`;
+  watch('#code', '.c', {
+    hover: e => e.dataset.elt,
+    bb: e => e.dataset.bb,
+    elt: e => e.dataset.elt,
+    // If we're already viewing an iteration of this BB, stick with the same.
+    iter: e => (selection.iter && selection.bb == e.dataset.bb) ? selection.iter : lastIter(e.dataset.bb),
+  }, data);
+  watch('#cfg', '.bb', {
+    hover: e => e.id,
+    bb: e => e.id,
+    elt: e => e.id + ".0",
+    iter: e => lastIter(e.id),
+  }, data);
+  watch('#timeline', '.entry', {
+    hover: e => [e.id, e.dataset.bb],
+    bb: e => e.dataset.bb,
+    elt: e => e.dataset.bb + ".0",
+    iter: e => e.id,
+  }, data);
+  watch('#bb-elements', 'tr', {
+    hover: e => e.id,
+    elt: e => e.id,
+  }, data);
+  watch('#iterations', '.chooser', {
+    hover: e => e.dataset.iter,
+    iter: e => e.dataset.iter,
+  }, data);
+  updateSelection({}, data);
+}
+function applyClassIf(cls, query) {
+  document.querySelectorAll('.' + cls).forEach(elt => elt.classList.remove(cls));
+  document.querySelectorAll(query).forEach(elt => elt.classList.add(cls));
+}
+function classSelector(cls) {
+  if (cls == null) return null;
+  if (Array.isArray(cls)) return cls.map(classSelector).join(', ');
+  var escaped = cls.replace('.', '\\.').replace(':', '\\:');
+  // don't require id="foo" class="foo"
+  return '.' + escaped + ", #" + escaped;
+}
+
+// Add a stylesheet defining colors for n basic blocks.
+function addBBColors(n) {
+  let sheet = new CSSStyleSheet();
+  // hex values to subtract from fff to get a base color
+  options = [0x001, 0x010, 0x011, 0x100, 0x101, 0x110, 0x111];
+  function color(hex) {
+    return "#" + hex.toString(16).padStart(3, "0");
+  }
+  function add(selector, property, hex) {
+    sheet.insertRule(`${selector} { ${property}: ${color(hex)}; }`)
+  }
+  for (var i = 0; i < n; ++i) {
+    let opt = options[i%options.length];
+    add(`.B${i}`, 'background-color', 0xfff - 2*opt);
+    add(`#B${i} polygon`, 'fill', 0xfff - 2*opt);
+    add(`#B${i} polygon`, 'stroke', 0x888 - 4*opt);
+  }
+  document.adoptedStyleSheets.push(sheet);
+}
Index: clang/lib/Analysis/FlowSensitive/HTMLLogger.html
===================================================================
--- /dev/null
+++ clang/lib/Analysis/FlowSensitive/HTMLLogger.html
@@ -0,0 +1,66 @@
+<!doctype html>
+<html>
+
+<head>
+<?INJECT?>
+</head>
+
+<body>
+
+<section id="timeline" data-selection="">
+<header>Timeline</header>
+<template data-for="entry in timeline">
+  <div id="{{entry.block}}:{{entry.iter}}" data-bb="{{entry.block}}" class="entry">{{entry.block}} ({{entry.iter}})</div>
+</template>
+</section>
+
+<section id="function" data-selection="">
+<header>Function</header>
+<div id="code"></div>
+<div id="cfg"></div>
+</section>
+
+<section id="block" data-selection="bb">
+<header><template>Block {{selection.bb}}</template></header>
+<div id="iterations">
+  <template data-for="i in Array(cfg[selection.bb].iters).keys()">
+    <a class="chooser {{selection.bb}}:{{i+1}}" data-iter="{{selection.bb}}:{{i+1}}">Iteration {{i+1}}</a>
+  </template>
+</div>
+<table id="bb-elements">
+<template data-for="elt in cfg[selection.bb].elements">
+  <tr id="{{selection.bb}}.{{elt_index}}">
+    <td class="{{selection.bb}}">{{selection.bb}}.{{elt_index}}</td>
+    <td>{{elt}}</td>
+  </tr>
+</template>
+</table>
+</section>
+
+<section id="element" data-selection="iter,elt">
+<template data-let="state = states[selection.iter + '_' + selection.elt]">
+<header>
+  <template data-if="state.element == 0">{{state.block}} (iteration {{state.iter}}) initial state</template>
+  <template data-if="state.element != 0">Element {{selection.elt}} (iteration {{state.iter}})</template>
+</header>
+<template data-if="state.logs">
+  <h2>Logs</h2>
+  <pre>{{state.logs}}</pre>
+</template>
+<h2>Built-in lattice</h2>
+<pre>{{state.builtinLattice}}</pre>
+</template>
+</section>
+
+<script>
+addBBColors(Object.keys(HTMLLoggerData.cfg).length);
+watchSelection(HTMLLoggerData);
+updateSelection({}, HTMLLoggerData);
+// Copy code and cfg from <template>s into the body.
+for (tmpl of document.querySelectorAll('template[data-copy]'))
+  document.getElementById(tmpl.dataset.copy).replaceChildren(
+      ...tmpl.content.cloneNode(/*deep=*/true).childNodes);
+</script>
+
+</body>
+</html>
Index: clang/lib/Analysis/FlowSensitive/HTMLLogger.css
===================================================================
--- /dev/null
+++ clang/lib/Analysis/FlowSensitive/HTMLLogger.css
@@ -0,0 +1,118 @@
+/*===-- HTMLLogger.css ----------------------------------------------------===
+*
+* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+* See https://llvm.org/LICENSE.txt for license information.
+* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+*
+*===----------------------------------------------------------------------===*/
+html { font-family: sans-serif; }
+body { margin: 0; display: flex; justify-content: left; }
+body > * { box-sizing: border-box; }
+body > section {
+  border: 1px solid black;
+  min-width: 20em;
+  overflow: auto;
+  max-height: 100vh;
+}
+section header {
+  background-color: #008;
+  color: white;
+  font-weight: bold;
+  font-size: large;
+}
+section h2 {
+  font-size: medium;
+  margin-bottom: 0.5em;
+  padding-top: 0.5em;
+  border-top: 1px solid #aaa;
+}
+#timeline {
+  min-width: 0;
+}
+#timeline .entry.hover {
+  background-color: #aaa;
+}
+#timeline .entry.iter-select {
+  background-color: #aac;
+}
+
+#bb-elements {
+  font-family: monospace;
+  font-size: x-small;
+  border-collapse: collapse;
+}
+#bb-elements td:nth-child(1) {
+  text-align: right;
+  width: 4em;
+  border-right: 1px solid #008;
+  padding: 0.3em 0.5em;
+
+  font-weight: bold;
+  color: #888;
+};
+#bb-elements tr.hover {
+  background-color: #abc;
+}
+#bb-elements tr.elt-select {
+  background-color: #acf;
+}
+#iterations {
+  display: flex;
+}
+#iterations .chooser {
+  flex-grow: 1;
+  text-align: center;
+}
+#iterations .chooser:not(.iter-select).hover {
+  background-color: #aaa;
+}
+#iterations .iter-select {
+  font-weight: bold;
+  background-color: #ccc;
+}
+#iterations .chooser:not(.iter-select) {
+  text-decoration: underline;
+  color: blue;
+}
+
+code.filename {
+  font-weight: bold;
+  color: black;
+  background-color: #ccc;
+  display: block;
+  text-align: center;
+}
+code.line {
+  display: block;
+  white-space: pre;
+}
+code.line:before { /* line numbers */
+  content: attr(data-line);
+  display: inline-block;
+  width: 2em;
+  text-align: right;
+  padding-right: 2px;
+  background-color: #ccc;
+  border-right: 1px solid #888;
+  margin-right: 8px;
+}
+code.line:has(.bb-select):before {
+  border-right: 4px solid black;
+  margin-right: 5px;
+}
+.c.hover, .bb.hover {
+  filter: saturate(200%) brightness(90%);
+}
+.c.elt-select {
+  box-shadow: inset 0 -4px 2px -2px #a00;
+}
+.bb.bb-select polygon {
+  stroke-width: 4px;
+  filter: brightness(70%) saturate(150%);
+}
+.bb { user-select: none; }
+.bb polygon { fill: white; }
+#cfg {
+  position: relative;
+  margin-left: 0.5em;
+}
Index: clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
===================================================================
--- /dev/null
+++ clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp
@@ -0,0 +1,460 @@
+//===-- HTMLLogger.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HTML logger. Given a directory dir/, we write
+// dir/0.html for the first analysis, etc.
+// These files contain a visualization that allows inspecting the CFG and the
+// state of the analysis at each point.
+// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
+// so each output file is self-contained.
+//
+// VIEWS
+//
+// The timeline and function view are always shown. These allow selecting basic
+// blocks, statements within them, and processing iterations (BBs are visited
+// multiple times when e.g. loops are involved).
+// These are written directly into the HTML body.
+//
+// There are also listings of particular basic blocks, and dumps of the state
+// at particular analysis points (i.e. BB2 iteration 3 statement 2).
+// These are only shown when the relevant BB/analysis point is *selected*.
+//
+// DATA AND TEMPLATES
+//
+// The HTML proper is mostly static.
+// The analysis data is in a JSON object HTMLLoggerData which is embedded as
+// a <script> in the <head>.
+// This gets rendered into DOM by a simple template processor which substitutes
+// the data into <template> tags embedded in the HTML. (see inflate() in JS).
+// 
+// SELECTION
+//
+// This is the only real interactive mechanism.
+//
+// At any given time, there are several named selections, e.g.:
+//   bb: B2               (basic block 0 is selected)
+//   elt: B2.4            (statement 4 is selected)
+//   iter: B2:1           (iteration 1 of the basic block is selected)
+//   hover: B3            (hovering over basic block 3)
+//
+// The selection is updated by mouse events: hover by moving the mouse and
+// others by clicking. Elements that are click targets generally have attributes
+// (id or data-foo) that define what they should select.
+// See watchSelection() in JS for the exact logic.
+//
+// When the "bb" selection is set to "B2":
+//   - sections <section data-selection="bb"> get shown
+//   - templates under such sections get re-rendered
+//   - elements with class/id "B2" get class "bb-select"
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
+#include "clang/Analysis/FlowSensitive/DebugSupport.h"
+#include "clang/Analysis/FlowSensitive/Logger.h"
+#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/Value.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+// Defines assets: HTMLLogger_{html_js,css}
+#include "HTMLLogger.inc"
+
+namespace clang::dataflow {
+namespace {
+
+void escape(char C, llvm::raw_ostream &OS) {
+  switch (C) {
+  case '<':
+    OS << "&lt;";
+    break;
+  case '&':
+    OS << "&amp;";
+    break;
+  case '"':
+    OS << "&quot;";
+    break;
+  case '\'':
+    OS << "&apos;";
+    break;
+  default:
+    OS << C;
+    break;
+  }
+}
+void escape(llvm::StringRef S, llvm::raw_ostream &OS) {
+  for (char C : S)
+    escape(C, OS);
+}
+// Render a graphviz graph specification to SVG using the `dot` tool.
+llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
+
+using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
+
+class HTMLLogger : public Logger {
+  StreamFactory Streams;
+  std::unique_ptr<llvm::raw_ostream> OS;
+  std::optional<llvm::json::OStream> JOS;
+
+  const ControlFlowContext *CFG;
+  // Timeline of iterations of CFG block visitation.
+  std::vector<std::pair<const CFGBlock *, unsigned>> Iters;
+  // Number of times each CFG block has been seen.
+  llvm::DenseMap<const CFGBlock *, unsigned> BlockIters;
+  // The messages logged in the current context but not yet written.
+  std::string ContextLogs;
+  // The number of elements we have visited within the current CFG block.
+  unsigned ElementIndex;
+
+public:
+  explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
+  void beginAnalysis(const ControlFlowContext &CFG,
+                     TypeErasedDataflowAnalysis &A) override {
+    OS = Streams();
+    this->CFG = &CFG;
+    *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
+    *OS << "<style>" << HTMLLogger_css << "</style>\n";
+    *OS << "<script>" << HTMLLogger_js << "</script>\n";
+
+    writeCode();
+    writeCFG();
+
+    *OS << "<script>var HTMLLoggerData = \n";
+    JOS.emplace(*OS, /*Indent=*/2);
+    JOS->objectBegin();
+    JOS->attributeBegin("states");
+    JOS->objectBegin();
+  }
+  // Between beginAnalysis() and endAnalysis() we write all the states for
+  // particular analysis points into the `timeline` array.
+  void endAnalysis() override {
+    JOS->objectEnd();
+    JOS->attributeEnd();
+
+    JOS->attributeArray("timeline", [&] {
+      for (const auto &E : Iters) {
+        JOS->object([&] {
+          JOS->attribute("block", blockID(E.first->getBlockID()));
+          JOS->attribute("iter", E.second);
+        });
+      }
+    });
+    JOS->attributeObject("cfg", [&] {
+      for (const auto &E : BlockIters)
+        writeBlock(*E.first, E.second);
+    });
+
+    JOS->objectEnd();
+    JOS.reset();
+    *OS << ";\n</script>\n";
+    *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
+  }
+
+  void enterBlock(const CFGBlock &B) override {
+    Iters.emplace_back(&B, ++BlockIters[&B]);
+    ElementIndex = 0;
+  }
+  void enterElement(const CFGElement &E) override {
+    ++ElementIndex;
+  }
+
+  static std::string blockID(unsigned Block) {
+    return llvm::formatv("B{0}", Block);
+  }
+  static std::string eltID(unsigned Block, unsigned Element) {
+    return llvm::formatv("B{0}.{1}", Block, Element);
+  }
+  static std::string iterID(unsigned Block, unsigned Iter) {
+    return llvm::formatv("B{0}:{1}", Block, Iter);
+  }
+  static std::string elementIterID(unsigned Block, unsigned Iter,
+                                   unsigned Element) {
+    return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
+  }
+
+  // Write the analysis state associated with a particular analysis point.
+  // FIXME: this dump is fairly opaque. We should show:
+  //  - values associated with the current Stmt
+  //  - values associated with its children
+  //  - meaningful names for values
+  //  - which boolean values are implied true/false by the flow condition
+  void recordState(TypeErasedDataflowAnalysisState &State) override {
+    unsigned Block = Iters.back().first->getBlockID();
+    unsigned Iter = Iters.back().second;
+    JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {
+      JOS->attribute("block", blockID(Block));
+      JOS->attribute("iter", Iter);
+      JOS->attribute("element", ElementIndex);
+      if (!ContextLogs.empty()) {
+        JOS->attribute("logs", ContextLogs);
+        ContextLogs.clear();
+      }
+      {
+        std::string BuiltinLattice;
+        llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
+        State.Env.dump(BuiltinLatticeS);
+        JOS->attribute("builtinLattice", BuiltinLattice);
+      }
+    });
+  }
+  void blockConverged() override { logText("Block converged"); }
+
+  void logText(llvm::StringRef S) override {
+    ContextLogs.append(S.begin(), S.end());
+    ContextLogs.push_back('\n');
+  }
+
+private:
+  // Write the CFG block details.
+  // Currently this is just the list of elements in execution order.
+  // FIXME: an AST dump would be a useful view, too.
+  void writeBlock(const CFGBlock &B, unsigned Iters) {
+    JOS->attributeObject(blockID(B.getBlockID()), [&] {
+      JOS->attribute("iters", Iters);
+      JOS->attributeArray("elements", [&] {
+        for (const auto &Elt : B.Elements) {
+          std::string Dump;
+          llvm::raw_string_ostream DumpS(Dump);
+          Elt.dumpToStream(DumpS);
+          JOS->value(Dump);
+        }
+      });
+    });
+  }
+
+  // Write the code of function being examined.
+  // We want to overlay the code with <span>s that mark which BB particular
+  // tokens are associated with, and even which BB element (so that clicking
+  // can select the right element).
+  void writeCode() {
+    if (!CFG->getDecl())
+      return;
+    const auto &AST = CFG->getDecl()->getASTContext();
+    bool Invalid = false;
+
+    // Extract the source code from the original file.
+    // Pretty-printing from the AST would probably be nicer (no macros or
+    // indentation to worry about), but we need the boundaries of particular
+    // AST nodes and the printer doesn't provide this.
+    auto Range = clang::Lexer::makeFileCharRange(
+        CharSourceRange::getTokenRange(CFG->getDecl()->getSourceRange()),
+        AST.getSourceManager(), AST.getLangOpts());
+    if (Range.isInvalid())
+      return;
+    llvm::StringRef Code = clang::Lexer::getSourceText(
+        Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
+    if (Invalid)
+      return;
+
+    static constexpr unsigned Missing = -1;
+    // TokenInfo stores the BB and set of elements that a token is part of.
+    struct TokenInfo {
+      // The basic block this is part of.
+      // This is the BB of the stmt with the smallest containing range.
+      unsigned BB = Missing;
+      unsigned BBPriority = 0;
+      // The most specific stmt this is part of (smallest range).
+      unsigned Elt = Missing;
+      unsigned EltPriority = 0;
+      // All stmts this is part of.
+      SmallVector<unsigned> Elts;
+
+      // Mark this token as being part of BB.Elt.
+      // RangeLen is the character length of the element's range, used to
+      // distinguish inner vs outer statements.
+      // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
+      // However "a" has a smaller range, so is more specific. Clicking on the
+      // token "a" should select the stmt "a".
+      void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
+        // A worse BB (larger range) => ignore.
+        if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
+          return;
+        if (BB != this->BB) {
+          this->BB = BB;
+          Elts.clear();
+          BBPriority = RangeLen;
+        }
+        BBPriority = std::min(BBPriority, RangeLen);
+        Elts.push_back(Elt);
+        if (this->Elt == Missing || EltPriority > RangeLen)
+          this->Elt = Elt;
+      }
+      bool operator==(const TokenInfo &Other) const {
+        return std::tie(BB, Elt, Elts) ==
+               std::tie(Other.BB, Other.Elt, Other.Elts);
+      }
+      // Write the attributes for the <span> on this token.
+      void write(llvm::raw_ostream &OS) const {
+        OS << "class='c";
+        if (BB != Missing)
+          OS << " " << blockID(BB);
+        for (unsigned Elt : Elts)
+          OS << " " << eltID(BB, Elt);
+        OS << "'";
+
+        if (Elt != Missing)
+          OS << " data-elt='" << eltID(BB, Elt) << "'";
+        if (BB != Missing)
+          OS << " data-bb='" << blockID(BB) << "'";
+      }
+    };
+
+    // Construct one TokenInfo per character in a flat array.
+    // This is inefficient (chars in a token all have the same info) but simple.
+    std::vector<TokenInfo> State(Code.size());
+    for (const auto *Block : CFG->getCFG()) {
+      unsigned EltIndex = 0;
+      for (const auto& Elt : *Block) {
+        ++EltIndex;
+        if (const auto S = Elt.getAs<CFGStmt>()) {
+          auto EltRange = clang::Lexer::makeFileCharRange(
+              CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
+              AST.getSourceManager(), AST.getLangOpts());
+          if (EltRange.isInvalid())
+            continue;
+          if (EltRange.getBegin() < Range.getBegin() ||
+              EltRange.getEnd() >= Range.getEnd() ||
+              EltRange.getEnd() < Range.getBegin() ||
+              EltRange.getEnd() >= Range.getEnd())
+            continue;
+          unsigned Off = EltRange.getBegin().getRawEncoding() -
+                         Range.getBegin().getRawEncoding();
+          unsigned Len = EltRange.getEnd().getRawEncoding() -
+                         EltRange.getBegin().getRawEncoding();
+          for (unsigned I = 0; I < Len; ++I)
+            State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
+        }
+      }
+    }
+
+    // Finally, write the code with the correct <span>s.
+    unsigned Line =
+        AST.getSourceManager().getSpellingLineNumber(Range.getBegin());
+    *OS << "<template data-copy='code'>\n";
+    *OS << "<code class='filename'>";
+    escape(AST.getSourceManager().getFilename(Range.getBegin()), *OS);
+    *OS << "</code>";
+    *OS << "<code class='line' data-line='" << Line++ << "'>";
+    for (unsigned I = 0; I < Code.size(); ++I) {
+      // Don't actually write a <span> around each character, only break spans
+      // when the TokenInfo changes.
+      bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
+      bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
+      if (NeedOpen) {
+        *OS << "<span ";
+        State[I].write(*OS);
+        *OS << ">";
+      }
+      if (Code[I] == '\n')
+        *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
+      else
+        escape(Code[I], *OS);
+      if (NeedClose) *OS << "</span>";
+    }
+    *OS << "</code>\n";
+    *OS << "</template>";
+  }
+
+  // Write the CFG diagram, a graph of basic blocks.
+  // Laying out graphs is hard, so we construct a graphviz description and shell
+  // out to `dot` to turn it into an SVG.
+  void writeCFG() {
+    *OS << "<template data-copy='cfg'>\n";
+    if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG())))
+      *OS << *SVG;
+    else
+      *OS << "Can't draw CFG: " << toString(SVG.takeError());
+    *OS << "</template>\n";
+  }
+
+  // Produce a graphviz description of a CFG.
+  static std::string buildCFGDot(const clang::CFG &CFG) {
+    std::string Graph;
+    llvm::raw_string_ostream GraphS(Graph);
+    // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
+    GraphS << R"(digraph {
+      tooltip=" "
+      node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
+      edge[tooltip = " "]
+)";
+    for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I)
+      GraphS << "  " << blockID(I) << " [id=" << blockID(I) << "]\n";
+    for (const auto *Block : CFG) {
+      for (const auto &Succ : Block->succs()) {
+        GraphS << "  " << blockID(Block->getBlockID()) << " -> "
+               << blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
+      }
+    }
+    GraphS << "}\n";
+    return Graph;
+  }
+};
+
+// Nothing interesting here, just subprocess/temp-file plumbing.
+llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
+  auto Dot = llvm::sys::findProgramByName("dot");
+  if (!Dot)
+    return llvm::createStringError(Dot.getError(),
+                                   "Can't draw CFG: 'dot' not found on PATH");
+
+  // Create input and output files for `dot` subprocess.
+  // (We create the output file as empty, to reserve the temp filename).
+  llvm::SmallString<256> Input, Output;
+  int InputFD;
+  if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
+                                                   Input))
+    return llvm::createStringError(EC, "failed to create `dot` temp input");
+  llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
+  auto DeleteInput =
+      llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
+  if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
+    return llvm::createStringError(EC, "failed to create `dot` temp output");
+  auto DeleteOutput =
+      llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
+
+  std::vector<std::optional<llvm::StringRef>> Redirects = {
+      Input, Output,
+      /*stderr=*/std::nullopt};
+  std::string ErrMsg;
+  int Code = llvm::sys::ExecuteAndWait(
+      *Dot, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
+      /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
+  if (!ErrMsg.empty())
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "'dot' failed: " + ErrMsg);
+  if (Code != 0)
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "'dot' failed (" + llvm::Twine(Code) + ")");
+
+  auto Buf = llvm::MemoryBuffer::getFile(Output);
+  if (!Buf)
+    return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
+
+  // Output has <?xml> prefix we don't want. Skip to <svg> tag.
+  llvm::StringRef Result = Buf.get()->getBuffer();
+  auto Pos = Result.find("<svg");
+  if (Pos == llvm::StringRef::npos)
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "Can't find <svg> tag in `dot` output");
+  return Result.substr(Pos).str();
+}
+
+} // namespace
+
+std::unique_ptr<Logger>
+Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
+  return std::make_unique<HTMLLogger>(std::move(Streams));
+}
+
+} // namespace clang::dataflow
Index: clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
===================================================================
--- clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
+++ clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
@@ -20,14 +20,17 @@
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
 #include <cassert>
 #include <memory>
 #include <utility>
 
-static llvm::cl::opt<std::string>
-    DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
-                llvm::cl::desc("Emit log of dataflow analysis. With no arg, "
-                               "writes textual log to stderr."));
+static llvm::cl::opt<std::string> DataflowLog(
+    "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional,
+    llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual "
+                   "log to stderr. With an arg, writes HTML logs under the "
+                   "specified directory (one per analyzed function)."));
 
 namespace clang {
 namespace dataflow {
@@ -382,6 +385,34 @@
   return nullptr;
 }
 
+static std::unique_ptr<Logger> makeLoggerFromCommandLine() {
+  if (DataflowLog.empty())
+    return Logger::textual(llvm::errs());
+
+  llvm::StringRef Dir = DataflowLog;
+  if (auto EC = llvm::sys::fs::create_directories(Dir))
+    llvm::errs() << "Failed to create log dir: " << EC.message() << "\n";
+  // All analysis runs within a process will log to the same directory.
+  // Share a counter so they don't all overwrite each other's 0.html.
+  // (Don't share a logger, it's not threadsafe).
+  static std::atomic<unsigned> Counter = {0};
+  auto StreamFactory =
+      [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> {
+    llvm::SmallString<256> File(Dir);
+    llvm::sys::path::append(File,
+                            std::to_string(Counter.fetch_add(1)) + ".html");
+    std::error_code EC;
+    auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC);
+    if (EC) {
+      llvm::errs() << "Failed to create log " << File << ": " << EC.message()
+                   << "\n";
+      return std::make_unique<llvm::raw_null_ostream>();
+    }
+    return OS;
+  };
+  return Logger::html(std::move(StreamFactory));
+}
+
 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S,
                                                  Options Opts)
     : S(std::move(S)), TrueVal(createAtomicBoolValue()),
@@ -392,7 +423,7 @@
   // based tools.
   if (Opts.Log == nullptr) {
     if (DataflowLog.getNumOccurrences() > 0) {
-      LogOwner = Logger::textual(llvm::errs());
+      LogOwner = makeLoggerFromCommandLine();
       this->Opts.Log = LogOwner.get();
       // FIXME: if the flag is given a value, write an HTML log to a file.
     } else {
Index: clang/lib/Analysis/FlowSensitive/CMakeLists.txt
===================================================================
--- clang/lib/Analysis/FlowSensitive/CMakeLists.txt
+++ clang/lib/Analysis/FlowSensitive/CMakeLists.txt
@@ -2,6 +2,7 @@
   ControlFlowContext.cpp
   DataflowAnalysisContext.cpp
   DataflowEnvironment.cpp
+  HTMLLogger.cpp
   Logger.cpp
   Transfer.cpp
   TypeErasedDataflowAnalysis.cpp
@@ -16,3 +17,14 @@
   )
 
 add_subdirectory(Models)
+
+add_custom_command(OUTPUT HTMLLogger.inc
+  COMMAND "${Python3_EXECUTABLE}" ${CLANG_SOURCE_DIR}/utils/bundle_resources.py 
+  ${CMAKE_CURRENT_BINARY_DIR}/HTMLLogger.inc
+  HTMLLogger.html HTMLLogger.css HTMLLogger.js
+  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  COMMENT "Bundling HTMLLogger resources"
+  DEPENDS ${CLANG_SOURCE_DIR}/utils/bundle_resources.py HTMLLogger.html HTMLLogger.css HTMLLogger.js
+  VERBATIM)
+add_custom_target(clangAnalysisFlowSensitiveResources DEPENDS HTMLLogger.inc)
+add_dependencies(clangAnalysisFlowSensitive clangAnalysisFlowSensitiveResources)
Index: clang/include/clang/Analysis/FlowSensitive/Logger.h
===================================================================
--- clang/include/clang/Analysis/FlowSensitive/Logger.h
+++ clang/include/clang/Analysis/FlowSensitive/Logger.h
@@ -31,6 +31,10 @@
   /// A logger that simply writes messages to the specified ostream in real
   /// time.
   static std::unique_ptr<Logger> textual(llvm::raw_ostream &);
+  /// A logger that builds an HTML UI to inspect the analysis results.
+  /// Each function's analysis is written to a stream obtained from the factory.
+  static std::unique_ptr<Logger>
+      html(std::function<std::unique_ptr<llvm::raw_ostream>()>);
 
   virtual ~Logger() = default;
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to