Needs review.  Jeff reviewed the v1 version of the patch here:
  https://gcc.gnu.org/ml/gcc-patches/2019-12/msg00818.html
requesting a function to be split up, which I did in v4.
See the URLs below for notes on the other changes.

Changed in v5:
- update ChangeLog path
- updated copyright years to include 2020

Changed in v4:
- Remove include of gcc-plugin.h, reworking includes accordingly.
- Wrap everything in #if ENABLE_ANALYZER
- Remove /// comment lines
- Add custom events:
    https://gcc.gnu.org/ml/gcc-patches/2019-12/msg00213.html
- Generalize rewind_info_t to exploded_edge::custom_info_t
    https://gcc.gnu.org/ml/gcc-patches/2019-12/msg00219.html
- Add support for global state:
    https://gcc.gnu.org/ml/gcc-patches/2019-12/msg00217.html
- Show rewind destination for leaks due to longjmp
    https://gcc.gnu.org/ml/gcc-patches/2019-11/msg02029.html
- Split diagnostic_manager::prune_path into subroutines
- Add DISABLE_COPY_AND_ASSIGN (saved_diagnostic);

This patch adds diagnostic_manager and related support classes for
saving, deduplicating, and emitting analyzer diagnostics.

gcc/analyzer/ChangeLog:
        * diagnostic-manager.cc: New file.
        * diagnostic-manager.h: New file.
---
 gcc/analyzer/diagnostic-manager.cc | 1217 ++++++++++++++++++++++++++++
 gcc/analyzer/diagnostic-manager.h  |  137 ++++
 2 files changed, 1354 insertions(+)
 create mode 100644 gcc/analyzer/diagnostic-manager.cc
 create mode 100644 gcc/analyzer/diagnostic-manager.h

diff --git a/gcc/analyzer/diagnostic-manager.cc 
b/gcc/analyzer/diagnostic-manager.cc
new file mode 100644
index 000000000000..0fe30c42254d
--- /dev/null
+++ b/gcc/analyzer/diagnostic-manager.cc
@@ -0,0 +1,1217 @@
+/* Classes for saving, deduplicating, and emitting analyzer diagnostics.
+   Copyright (C) 2019-2020 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalc...@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "pretty-print.h"
+#include "gcc-rich-location.h"
+#include "gimple-pretty-print.h"
+#include "analyzer/analyzer.h"
+#include "analyzer/diagnostic-manager.h"
+#include "analyzer/exploded-graph.h"
+#include "analyzer/checker-path.h"
+
+#if ENABLE_ANALYZER
+
+/* class saved_diagnostic.  */
+
+/* saved_diagnostic's ctor.
+   Take ownership of D and STMT_FINDER.  */
+
+saved_diagnostic::saved_diagnostic (const state_machine *sm,
+                                   const exploded_node *enode,
+                                   const supernode *snode, const gimple *stmt,
+                                   stmt_finder *stmt_finder,
+                                   tree var, state_machine::state_t state,
+                                   pending_diagnostic *d)
+: m_sm (sm), m_enode (enode), m_snode (snode), m_stmt (stmt),
+ /* stmt_finder could be on-stack; we want our own copy that can
+    outlive that.  */
+  m_stmt_finder (stmt_finder ? stmt_finder->clone () : NULL),
+  m_var (var), m_state (state),
+  m_d (d), m_trailing_eedge (NULL)
+{
+  gcc_assert (m_stmt || m_stmt_finder);
+
+  /* We must have an enode in order to be able to look for paths
+     through the exploded_graph to this diagnostic.  */
+  gcc_assert (m_enode);
+}
+
+/* saved_diagnostic's dtor.  */
+
+saved_diagnostic::~saved_diagnostic ()
+{
+  delete m_stmt_finder;
+  delete m_d;
+}
+
+/* class diagnostic_manager.  */
+
+/* diagnostic_manager's ctor.  */
+
+diagnostic_manager::diagnostic_manager (logger *logger, int verbosity)
+: log_user (logger), m_verbosity (verbosity)
+{
+}
+
+/* Queue pending_diagnostic D at ENODE for later emission.  */
+
+void
+diagnostic_manager::add_diagnostic (const state_machine *sm,
+                                   const exploded_node *enode,
+                                   const supernode *snode, const gimple *stmt,
+                                   stmt_finder *finder,
+                                   tree var, state_machine::state_t state,
+                                   pending_diagnostic *d)
+{
+  LOG_FUNC (get_logger ());
+
+  /* We must have an enode in order to be able to look for paths
+     through the exploded_graph to the diagnostic.  */
+  gcc_assert (enode);
+
+  saved_diagnostic *sd
+    = new saved_diagnostic (sm, enode, snode, stmt, finder, var, state, d);
+  m_saved_diagnostics.safe_push (sd);
+  if (get_logger ())
+    log ("adding saved diagnostic %i at SN %i: %qs",
+        m_saved_diagnostics.length () - 1,
+        snode->m_index, d->get_kind ());
+}
+
+/* Queue pending_diagnostic D at ENODE for later emission.  */
+
+void
+diagnostic_manager::add_diagnostic (const exploded_node *enode,
+                                   const supernode *snode, const gimple *stmt,
+                                   stmt_finder *finder,
+                                   pending_diagnostic *d)
+{
+  gcc_assert (enode);
+  add_diagnostic (NULL, enode, snode, stmt, finder, NULL_TREE, 0, d);
+}
+
+/* A class for identifying sets of duplicated pending_diagnostic.
+
+   We want to find the simplest dedupe_candidate amongst those that share a
+   dedupe_key.  */
+
+class dedupe_key
+{
+public:
+  dedupe_key (const saved_diagnostic &sd,
+             const exploded_path &epath)
+  : m_sd (sd), m_stmt (sd.m_stmt)
+  {
+    /* Support deferring the choice of stmt until after an emission path has
+     been built, using an optional stmt_finder.  */
+    if (m_stmt == NULL)
+      {
+       gcc_assert (sd.m_stmt_finder);
+       m_stmt = sd.m_stmt_finder->find_stmt (epath);
+      }
+    gcc_assert (m_stmt);
+  }
+
+  hashval_t hash () const
+  {
+    inchash::hash hstate;
+    hstate.add_ptr (m_stmt);
+    // TODO: m_sd
+    return hstate.end ();
+  }
+  bool operator== (const dedupe_key &other) const
+  {
+    return (m_sd == other.m_sd
+           && m_stmt == other.m_stmt);
+  }
+
+  location_t get_location () const
+  {
+    return m_stmt->location;
+  }
+
+  /* A qsort comparator for use by dedupe_winners::emit_best
+     to sort them into location_t order.  */
+
+  static int
+  comparator (const void *p1, const void *p2)
+  {
+    const dedupe_key *pk1 = *(const dedupe_key * const *)p1;
+    const dedupe_key *pk2 = *(const dedupe_key * const *)p2;
+
+    location_t loc1 = pk1->get_location ();
+    location_t loc2 = pk2->get_location ();
+
+    return linemap_compare_locations (line_table, loc2, loc1);
+  }
+
+  const saved_diagnostic &m_sd;
+  const gimple *m_stmt;
+};
+
+/* The value of a slot for a dedupe_key within dedupe_winners:
+   the exploded_path for the best candidate for that key, and the
+   number of duplicates seen so far.  */
+
+class dedupe_candidate
+{
+public:
+  // has the exploded_path
+  dedupe_candidate (const shortest_exploded_paths &sp,
+                   const saved_diagnostic &sd)
+  : m_epath (sp.get_shortest_path (sd.m_enode)),
+    m_num_dupes (0)
+  {
+  }
+
+  unsigned length () const { return m_epath.length (); }
+  const exploded_path &get_path () const { return m_epath; }
+
+  void add_duplicate () { m_num_dupes++; }
+  int get_num_dupes () const { return m_num_dupes; }
+
+private:
+  exploded_path m_epath;
+public:
+  int m_num_dupes;
+};
+
+/* Traits for use by dedupe_winners.  */
+
+class dedupe_hash_map_traits
+{
+public:
+  typedef const dedupe_key *key_type;
+  typedef dedupe_candidate *value_type;
+  typedef dedupe_candidate *compare_type;
+
+  static inline hashval_t hash (const key_type &v)
+  {
+    return v->hash ();
+  }
+  static inline bool equal_keys (const key_type &k1, const key_type &k2)
+  {
+    return *k1 == *k2;
+  }
+  template <typename T>
+  static inline void remove (T &)
+  {
+    // TODO
+  }
+  template <typename T>
+  static inline void mark_deleted (T &entry)
+  {
+    entry.m_key = reinterpret_cast<key_type> (1);
+  }
+  template <typename T>
+  static inline void mark_empty (T &entry)
+  {
+    entry.m_key = NULL;
+  }
+  template <typename T>
+  static inline bool is_deleted (const T &entry)
+  {
+    return entry.m_key == reinterpret_cast<key_type> (1);
+  }
+  template <typename T>
+  static inline bool is_empty (const T &entry)
+  {
+    return entry.m_key == NULL;
+  }
+
+};
+
+/* A class for deduplicating diagnostics and finding (and emitting) the
+   best diagnostic within each partition.  */
+
+class dedupe_winners
+{
+public:
+  ~dedupe_winners ()
+  {
+    /* Delete all keys and candidates.  */
+    for (map_t::iterator iter = m_map.begin ();
+        iter != m_map.end ();
+        ++iter)
+      {
+       delete (*iter).first;
+       delete (*iter).second;
+      }
+  }
+
+  /* Determine an exploded_path for SD using SP and, if it's feasible,
+     determine if it's the best seen so far for its dedupe_key.
+     Retain the winner for each dedupe_key, and discard the rest.  */
+
+  void add (logger *logger,
+           const shortest_exploded_paths &sp,
+           const saved_diagnostic &sd)
+  {
+    /* Build a dedupe_candidate for SD.
+       This uses SP to build an exploded_path.  */
+    dedupe_candidate *dc = new dedupe_candidate (sp, sd);
+
+    /* Verify that the epath is feasible.
+       State-merging means that not every path in the epath corresponds
+       to a feasible one w.r.t. states.
+       Here we simply check each duplicate saved_diagnostic's
+       shortest_path, and reject any that aren't feasible.
+       This could introduce false negatives, as there could be longer
+       feasible paths within the egraph.  */
+    if (logger)
+      logger->log ("considering %qs at SN: %i",
+                  sd.m_d->get_kind (), sd.m_snode->m_index);
+    if (!dc->get_path ().feasible_p (logger))
+      {
+       if (logger)
+         logger->log ("rejecting %qs at SN: %i"
+                      " due to infeasible path",
+                      sd.m_d->get_kind (), sd.m_snode->m_index);
+       delete dc;
+       return;
+      }
+    else
+      if (logger)
+       logger->log ("accepting %qs at SN: %i with feasible path",
+                    sd.m_d->get_kind (), sd.m_snode->m_index);
+
+    dedupe_key *key = new dedupe_key (sd, dc->get_path ());
+    if (dedupe_candidate **slot = m_map.get (key))
+      {
+       (*slot)->add_duplicate ();
+
+       if (dc->length () < (*slot)->length ())
+         {
+           /* We've got a shorter path for the key; replace
+              the current candidate.  */
+           dc->m_num_dupes = (*slot)->get_num_dupes ();
+           delete *slot;
+           *slot = dc;
+         }
+       else
+         /* We haven't beaten the current best candidate;
+            drop the new candidate.  */
+         delete dc;
+       delete key;
+      }
+    else
+      /* This is the first candidate for this key.  */
+      m_map.put (key, dc);
+  }
+
+ /* Emit the simplest diagnostic within each set.  */
+
+  void emit_best (diagnostic_manager *dm,
+                 const exploded_graph &eg)
+  {
+    LOG_SCOPE (dm->get_logger ());
+
+    /* Get keys into a vec for sorting.  */
+    auto_vec<const dedupe_key *> keys (m_map.elements ());
+    for (map_t::iterator iter = m_map.begin ();
+        iter != m_map.end ();
+        ++iter)
+      keys.quick_push ((*iter).first);
+
+    dm->log ("# keys after de-duplication: %i", keys.length ());
+
+    /* Sort into a good emission order.  */
+    keys.qsort (dedupe_key::comparator);
+
+    /* Emit the best candidate for each key.  */
+    int i;
+    const dedupe_key *key;
+    FOR_EACH_VEC_ELT (keys, i, key)
+      {
+       dedupe_candidate **slot = m_map.get (key);
+       gcc_assert (*slot);
+       const dedupe_candidate &dc = **slot;
+
+       dm->emit_saved_diagnostic (eg, key->m_sd,
+                                  dc.get_path (), key->m_stmt,
+                                  dc.get_num_dupes ());
+      }
+  }
+
+private:
+
+  /* This maps from each dedupe_key to a current best dedupe_candidate.  */
+
+  typedef hash_map<const dedupe_key *, dedupe_candidate *,
+                  dedupe_hash_map_traits> map_t;
+  map_t m_map;
+};
+
+/* Emit all saved diagnostics.  */
+
+void
+diagnostic_manager::emit_saved_diagnostics (const exploded_graph &eg)
+{
+  LOG_SCOPE (get_logger ());
+  auto_timevar tv (TV_ANALYZER_DIAGNOSTICS);
+  log ("# saved diagnostics: %i", m_saved_diagnostics.length ());
+
+  if (m_saved_diagnostics.length () == 0)
+    return;
+
+  /* Compute the shortest_paths once, sharing it between all diagnostics.  */
+  shortest_exploded_paths sp (eg, eg.get_origin ());
+
+  /* Iterate through all saved diagnostics, adding them to a dedupe_winners
+     instance.  This partitions the saved diagnostics by dedupe_key,
+     generating exploded_paths for them, and retaining the best one in each
+     partition.  */
+  dedupe_winners best_candidates;
+
+  int i;
+  saved_diagnostic *sd;
+  FOR_EACH_VEC_ELT (m_saved_diagnostics, i, sd)
+    best_candidates.add (get_logger (), sp, *sd);
+
+  /* For each dedupe-key, call emit_saved_diagnostic on the "best"
+     saved_diagnostic.  */
+  best_candidates.emit_best (this, eg);
+}
+
+/* Given a saved_diagnostic SD at STMT with feasible path EPATH through EG,
+   create an checker_path of suitable events and use it to call
+   SD's underlying pending_diagnostic "emit" vfunc to emit a diagnostic.  */
+
+void
+diagnostic_manager::emit_saved_diagnostic (const exploded_graph &eg,
+                                          const saved_diagnostic &sd,
+                                          const exploded_path &epath,
+                                          const gimple *stmt,
+                                          int num_dupes)
+{
+  LOG_SCOPE (get_logger ());
+  log ("sd: %qs at SN: %i", sd.m_d->get_kind (), sd.m_snode->m_index);
+  log ("num dupes: %i", num_dupes);
+
+  pretty_printer *pp = global_dc->printer->clone ();
+
+  checker_path emission_path;
+
+  /* Populate emission_path with a full description of EPATH.  */
+  build_emission_path (eg, epath, &emission_path);
+
+  /* Now prune it to just cover the most pertinent events.  */
+  prune_path (&emission_path, sd.m_sm, sd.m_var, sd.m_state);
+
+  /* Add a final event to the path, covering the diagnostic itself.
+     We use the final enode from the epath, which might be different from
+     the sd.m_enode, as the dedupe code doesn't care about enodes, just
+     snodes.  */
+  emission_path.add_final_event (sd.m_sm, epath.get_final_enode (), stmt,
+                                sd.m_var, sd.m_state);
+
+  /* The "final" event might not be final; if the saved_diagnostic has a
+     trailing eedge stashed, add any events for it.  This is for use
+     in handling longjmp, to show where a longjmp is rewinding to.  */
+  if (sd.m_trailing_eedge)
+    add_events_for_eedge (*sd.m_trailing_eedge, eg.get_ext_state (),
+                         &emission_path);
+
+  emission_path.prepare_for_emission (sd.m_d);
+
+  gcc_rich_location rich_loc (stmt->location);
+  rich_loc.set_path (&emission_path);
+
+  auto_diagnostic_group d;
+  auto_cfun sentinel (sd.m_snode->m_fun);
+  if (sd.m_d->emit (&rich_loc))
+    {
+      if (num_dupes > 0)
+       inform_n (stmt->location, num_dupes,
+                 "%i duplicate", "%i duplicates",
+                 num_dupes);
+    }
+  delete pp;
+}
+
+/* Given a state change to DST_REP, determine a tree that gives the origin
+   of that state at STMT, using DST_STATE's region model, so that state
+   changes based on assignments can be tracked back to their origins.
+
+   For example, if we have
+
+     (S1) _1 = malloc (64);
+     (S2) EXPR = _1;
+
+   then at stmt S2 we can get the origin of EXPR's state as being _1,
+   and thus track the allocation back to S1.  */
+
+static tree
+get_any_origin (const gimple *stmt,
+               tree dst_rep,
+               const program_state &dst_state)
+{
+  if (!stmt)
+    return NULL_TREE;
+
+  gcc_assert (dst_rep);
+
+  if (const gassign *assign = dyn_cast <const gassign *> (stmt))
+    {
+      tree lhs = gimple_assign_lhs (assign);
+      /* Use region IDs to compare lhs with DST_REP.  */
+      if (dst_state.m_region_model->get_lvalue (lhs, NULL)
+         == dst_state.m_region_model->get_lvalue (dst_rep, NULL))
+       {
+         tree rhs1 = gimple_assign_rhs1 (assign);
+         enum tree_code op = gimple_assign_rhs_code (assign);
+         switch (op)
+           {
+           default:
+             //gcc_unreachable ();  // TODO
+             break;
+           case COMPONENT_REF:
+           case SSA_NAME:
+             return rhs1;
+           }
+       }
+    }
+  return NULL_TREE;
+}
+
+/* Emit a "path" of events to EMISSION_PATH describing the exploded path
+   EPATH within EG.  */
+
+void
+diagnostic_manager::build_emission_path (const exploded_graph &eg,
+                                        const exploded_path &epath,
+                                        checker_path *emission_path) const
+{
+  LOG_SCOPE (get_logger ());
+  const extrinsic_state &ext_state = eg.get_ext_state ();
+  for (unsigned i = 0; i < epath.m_edges.length (); i++)
+    {
+      const exploded_edge *eedge = epath.m_edges[i];
+      add_events_for_eedge (*eedge, ext_state, emission_path);
+    }
+}
+
+/* Subclass of state_change_visitor that creates state_change_event
+   instances.  */
+
+class state_change_event_creator : public state_change_visitor
+{
+public:
+  state_change_event_creator (const exploded_edge &eedge,
+                             checker_path *emission_path)
+    : m_eedge (eedge),
+      m_emission_path (emission_path)
+  {}
+
+  bool on_global_state_change (const state_machine &sm,
+                              state_machine::state_t src_sm_val,
+                              state_machine::state_t dst_sm_val)
+    FINAL OVERRIDE
+  {
+    const exploded_node *src_node = m_eedge.m_src;
+    const program_point &src_point = src_node->get_point ();
+    const int src_stack_depth = src_point.get_stack_depth ();
+    const exploded_node *dst_node = m_eedge.m_dest;
+    const gimple *stmt = src_point.get_stmt ();
+    const supernode *supernode = src_point.get_supernode ();
+    const program_state &dst_state = dst_node->get_state ();
+
+    int stack_depth = src_stack_depth;
+
+    m_emission_path->add_event (new state_change_event (supernode,
+                                                       stmt,
+                                                       stack_depth,
+                                                       sm,
+                                                       NULL_TREE,
+                                                       src_sm_val,
+                                                       dst_sm_val,
+                                                       NULL_TREE,
+                                                       dst_state));
+    return false;
+  }
+
+  bool on_state_change (const state_machine &sm,
+                       state_machine::state_t src_sm_val,
+                       state_machine::state_t dst_sm_val,
+                       tree dst_rep,
+                       svalue_id dst_origin_sid) FINAL OVERRIDE
+  {
+    const exploded_node *src_node = m_eedge.m_src;
+    const program_point &src_point = src_node->get_point ();
+    const int src_stack_depth = src_point.get_stack_depth ();
+    const exploded_node *dst_node = m_eedge.m_dest;
+    const gimple *stmt = src_point.get_stmt ();
+    const supernode *supernode = src_point.get_supernode ();
+    const program_state &dst_state = dst_node->get_state ();
+
+    int stack_depth = src_stack_depth;
+
+    if (m_eedge.m_sedge
+       && m_eedge.m_sedge->m_kind == SUPEREDGE_CFG_EDGE)
+      {
+       supernode = src_point.get_supernode ();
+       stmt = supernode->get_last_stmt ();
+       stack_depth = src_stack_depth;
+      }
+
+    /* Bulletproofing for state changes at calls/returns;
+       TODO: is there a better way? */
+    if (!stmt)
+      return false;
+
+    tree origin_rep
+      = dst_state.get_representative_tree (dst_origin_sid);
+
+    if (origin_rep == NULL_TREE)
+      origin_rep = get_any_origin (stmt, dst_rep, dst_state);
+    m_emission_path->add_event (new state_change_event (supernode,
+                                                       stmt,
+                                                       stack_depth,
+                                                       sm,
+                                                       dst_rep,
+                                                       src_sm_val,
+                                                       dst_sm_val,
+                                                       origin_rep,
+                                                       dst_state));
+    return false;
+  }
+
+  const exploded_edge &m_eedge;
+  checker_path *m_emission_path;
+};
+
+/* Compare SRC_STATE and DST_STATE (which use EXT_STATE), and call
+   VISITOR's on_state_change for every sm-state change that occurs
+   to a tree, and on_global_state_change for every global state change
+   that occurs.
+
+   This determines the state changes that ought to be reported to
+   the user: a combination of the effects of changes to sm_state_map
+   (which maps svalues to sm-states), and of region_model changes
+   (which map trees to svalues).
+
+   Bail out early and return true if any call to on_global_state_change
+   or on_state_change returns true, otherwise return false.
+
+   This is split out to make it easier to experiment with changes to
+   exploded_node granularity (so that we can observe what state changes
+   lead to state_change_events being emitted).  */
+
+bool
+for_each_state_change (const program_state &src_state,
+                      const program_state &dst_state,
+                      const extrinsic_state &ext_state,
+                      state_change_visitor *visitor)
+{
+  gcc_assert (src_state.m_checker_states.length ()
+             == ext_state.m_checkers.length ());
+  gcc_assert (dst_state.m_checker_states.length ()
+             == ext_state.m_checkers.length ());
+  for (unsigned i = 0; i < ext_state.m_checkers.length (); i++)
+    {
+      const state_machine &sm = ext_state.get_sm (i);
+      const sm_state_map &src_smap = *src_state.m_checker_states[i];
+      const sm_state_map &dst_smap = *dst_state.m_checker_states[i];
+
+      /* Add events for any global state changes.  */
+      if (src_smap.get_global_state () != dst_smap.get_global_state ())
+       if (visitor->on_global_state_change (sm,
+                                            src_smap.get_global_state (),
+                                            dst_smap.get_global_state ()))
+         return true;
+
+      /* Add events for per-svalue state changes.  */
+      for (sm_state_map::iterator_t iter = dst_smap.begin ();
+          iter != dst_smap.end ();
+          ++iter)
+       {
+         /* Ideally we'd directly compare the SM state between src state
+            and dst state, but there's no guarantee that the IDs can
+            be meaningfully compared.  */
+         svalue_id dst_sid = (*iter).first;
+         state_machine::state_t dst_sm_val = (*iter).second.m_state;
+
+         auto_vec<path_var> dst_pvs;
+         dst_state.m_region_model->get_path_vars_for_svalue (dst_sid,
+                                                             &dst_pvs);
+
+         unsigned j;
+         path_var *dst_pv;
+         FOR_EACH_VEC_ELT (dst_pvs, j, dst_pv)
+           {
+             tree dst_rep = dst_pv->m_tree;
+             gcc_assert (dst_rep);
+             if (dst_pv->m_stack_depth
+                 >= src_state.m_region_model->get_stack_depth ())
+               continue;
+             svalue_id src_sid
+               = src_state.m_region_model->get_rvalue (*dst_pv, NULL);
+             if (src_sid.null_p ())
+               continue;
+             state_machine::state_t src_sm_val = src_smap.get_state (src_sid);
+             if (dst_sm_val != src_sm_val)
+               {
+                 svalue_id dst_origin_sid = (*iter).second.m_origin;
+                 if (visitor->on_state_change (sm, src_sm_val, dst_sm_val,
+                                               dst_rep, dst_origin_sid))
+                   return true;
+               }
+           }
+       }
+    }
+  return false;
+}
+
+/* Subroutine of diagnostic_manager::build_emission_path.
+   Add any events for EEDGE to EMISSION_PATH.  */
+
+void
+diagnostic_manager::add_events_for_eedge (const exploded_edge &eedge,
+                                         const extrinsic_state &ext_state,
+                                         checker_path *emission_path) const
+{
+  const exploded_node *src_node = eedge.m_src;
+  const program_point &src_point = src_node->get_point ();
+  const exploded_node *dst_node = eedge.m_dest;
+  const program_point &dst_point = dst_node->get_point ();
+  const int dst_stack_depth = dst_point.get_stack_depth ();
+  if (get_logger ())
+    {
+      get_logger ()->start_log_line ();
+      pretty_printer *pp = get_logger ()->get_printer ();
+      pp_printf (pp, "EN %i -> EN %i: ",
+                eedge.m_src->m_index,
+                eedge.m_dest->m_index);
+      src_point.print (pp, format (false));
+      pp_string (pp, "-> ");
+      dst_point.print (pp, format (false));
+      get_logger ()->end_log_line ();
+    }
+  const program_state &src_state = src_node->get_state ();
+  const program_state &dst_state = dst_node->get_state ();
+
+  /* Add state change events for the states that have changed.
+     We add these before events for superedges, so that if we have a
+     state_change_event due to following an edge, we'll get this sequence
+     of events:
+
+      | if (!ptr)
+      |    ~
+      |    |
+      |    (1) assuming 'ptr' is non-NULL  (state_change_event)
+      |    (2) following 'false' branch... (start_cfg_edge_event)
+     ...
+      | do_something (ptr);
+      | ~~~~~~~~~~~~~^~~~~
+      |              |
+      |              (3) ...to here        (end_cfg_edge_event).  */
+  state_change_event_creator visitor (eedge, emission_path);
+  for_each_state_change (src_state, dst_state, ext_state,
+                        &visitor);
+
+  /* Allow non-standard edges to add events, e.g. when rewinding from
+     longjmp to a setjmp.  */
+  if (eedge.m_custom_info)
+    eedge.m_custom_info->add_events_to_path (emission_path, eedge);
+
+  /* Add events for superedges, function entries, and for statements.  */
+  switch (dst_point.get_kind ())
+    {
+    default:
+      break;
+    case PK_BEFORE_SUPERNODE:
+      if (src_point.get_kind () == PK_AFTER_SUPERNODE)
+       {
+         if (eedge.m_sedge)
+           add_events_for_superedge (eedge, emission_path);
+       }
+      /* Add function entry events.  */
+      if (dst_point.get_supernode ()->entry_p ())
+       {
+         emission_path->add_event
+           (new function_entry_event
+            (dst_point.get_supernode ()->get_start_location (),
+             dst_point.get_fndecl (),
+             dst_stack_depth));
+       }
+      break;
+    case PK_BEFORE_STMT:
+      {
+       const gimple *stmt = dst_point.get_stmt ();
+       if (is_setjmp_call_p (stmt))
+         emission_path->add_event
+           (new setjmp_event (stmt->location,
+                              dst_node,
+                              dst_point.get_fndecl (),
+                              dst_stack_depth));
+       else
+         emission_path->add_event
+           (new statement_event (stmt,
+                                 dst_point.get_fndecl (),
+                                 dst_stack_depth, dst_state));
+      }
+      break;
+    }
+}
+
+/* Subroutine of diagnostic_manager::add_events_for_eedge
+   where EEDGE has an underlying superedge i.e. a CFG edge,
+   or an interprocedural call/return.
+   Add any events for the superedge to EMISSION_PATH.  */
+
+void
+diagnostic_manager::add_events_for_superedge (const exploded_edge &eedge,
+                                             checker_path *emission_path)
+  const
+{
+  gcc_assert (eedge.m_sedge);
+
+  const exploded_node *src_node = eedge.m_src;
+  const program_point &src_point = src_node->get_point ();
+  const exploded_node *dst_node = eedge.m_dest;
+  const program_point &dst_point = dst_node->get_point ();
+  const int src_stack_depth = src_point.get_stack_depth ();
+  const int dst_stack_depth = dst_point.get_stack_depth ();
+  const gimple *last_stmt = src_point.get_supernode ()->get_last_stmt ();
+
+  switch (eedge.m_sedge->m_kind)
+    {
+    case SUPEREDGE_CFG_EDGE:
+      {
+       emission_path->add_event
+         (new start_cfg_edge_event (eedge,
+                              (last_stmt
+                               ? last_stmt->location
+                               : UNKNOWN_LOCATION),
+                              src_point.get_fndecl (),
+                              src_stack_depth));
+       emission_path->add_event
+         (new end_cfg_edge_event (eedge,
+                                  dst_point.get_supernode 
()->get_start_location (),
+                                  dst_point.get_fndecl (),
+                                  dst_stack_depth));
+      }
+      break;
+
+    case SUPEREDGE_CALL:
+      {
+       emission_path->add_event
+         (new call_event (eedge,
+                          (last_stmt
+                           ? last_stmt->location
+                           : UNKNOWN_LOCATION),
+                          src_point.get_fndecl (),
+                          src_stack_depth));
+      }
+      break;
+
+    case SUPEREDGE_INTRAPROCEDURAL_CALL:
+      {
+       /* TODO: add a subclass for this, or generate events for the
+          summary.  */
+       emission_path->add_event
+         (new debug_event ((last_stmt
+                            ? last_stmt->location
+                            : UNKNOWN_LOCATION),
+                           src_point.get_fndecl (),
+                           src_stack_depth,
+                           "call summary"));
+      }
+      break;
+
+    case SUPEREDGE_RETURN:
+      {
+       const return_superedge *return_edge
+         = as_a <const return_superedge *> (eedge.m_sedge);
+
+       const gcall *call_stmt = return_edge->get_call_stmt ();
+       emission_path->add_event
+         (new return_event (eedge,
+                            (call_stmt
+                             ? call_stmt->location
+                             : UNKNOWN_LOCATION),
+                            dst_point.get_fndecl (),
+                            dst_stack_depth));
+      }
+      break;
+    }
+}
+
+/* Prune PATH, based on the verbosity level, to the most pertinent
+   events for a diagnostic that involves VAR ending in state STATE
+   (for state machine SM).
+
+   PATH is updated in place, and the redundant checker_events are deleted.
+
+   As well as deleting events, call record_critical_state on events in
+   which state critical to the pending_diagnostic is being handled; see
+   the comment for diagnostic_manager::prune_for_sm_diagnostic.  */
+
+void
+diagnostic_manager::prune_path (checker_path *path,
+                               const state_machine *sm,
+                               tree var,
+                               state_machine::state_t state) const
+{
+  LOG_FUNC (get_logger ());
+  path->maybe_log (get_logger (), "path");
+  prune_for_sm_diagnostic (path, sm, var, state);
+  prune_interproc_events (path);
+  finish_pruning (path);
+  path->maybe_log (get_logger (), "pruned");
+}
+
+/* First pass of diagnostic_manager::prune_path: apply verbosity level,
+   pruning unrelated state change events.
+
+   Iterate backwards through PATH, skipping state change events that aren't
+   VAR but update the pertinent VAR when state-copying occurs.
+
+   As well as deleting events, call record_critical_state on events in
+   which state critical to the pending_diagnostic is being handled, so
+   that the event's get_desc vfunc can potentially supply a more precise
+   description of the event to the user.
+   e.g. improving
+     "calling 'foo' from 'bar'"
+   to
+     "passing possibly-NULL pointer 'ptr' to 'foo' from 'bar' as param 1"
+   when the diagnostic relates to later dereferencing 'ptr'.  */
+
+void
+diagnostic_manager::prune_for_sm_diagnostic (checker_path *path,
+                                            const state_machine *sm,
+                                            tree var,
+                                            state_machine::state_t state) const
+{
+  int idx = path->m_events.length () - 1;
+  while (idx >= 0 && idx < (signed)path->m_events.length ())
+    {
+      checker_event *base_event = path->m_events[idx];
+      if (get_logger ())
+       {
+         if (sm)
+           {
+             if (var)
+               log ("considering event %i, with var: %qE, state: %qs",
+                    idx, var, sm->get_state_name (state));
+             else
+               log ("considering event %i, with global state: %qs",
+                    idx, sm->get_state_name (state));
+           }
+         else
+           log ("considering event %i", idx);
+       }
+      switch (base_event->m_kind)
+       {
+       default:
+         gcc_unreachable ();
+
+       case EK_DEBUG:
+         if (m_verbosity < 3)
+           {
+             log ("filtering event %i: debug event", idx);
+             path->delete_event (idx);
+           }
+         break;
+
+       case EK_CUSTOM:
+         /* Don't filter custom events.  */
+         break;
+
+       case EK_STMT:
+         {
+           /* If this stmt is the origin of "var", update var.  */
+           if (var)
+             {
+               statement_event *stmt_event = (statement_event *)base_event;
+               tree new_var = get_any_origin (stmt_event->m_stmt, var,
+                                              stmt_event->m_dst_state);
+               if (new_var)
+                 {
+                   log ("event %i: switching var of interest from %qE to %qE",
+                        idx, var, new_var);
+                   var = new_var;
+                 }
+             }
+           if (m_verbosity < 3)
+             {
+               log ("filtering event %i: statement event", idx);
+               path->delete_event (idx);
+             }
+         }
+         break;
+
+       case EK_FUNCTION_ENTRY:
+         if (m_verbosity < 1)
+           {
+             log ("filtering event %i: function entry", idx);
+             path->delete_event (idx);
+           }
+         break;
+
+       case EK_STATE_CHANGE:
+         {
+           state_change_event *state_change = (state_change_event *)base_event;
+           if (state_change->get_lvalue (state_change->m_var)
+               == state_change->get_lvalue (var))
+             {
+               if (state_change->m_origin)
+                 {
+                   log ("event %i: switching var of interest from %qE to %qE",
+                        idx, var, state_change->m_origin);
+                   var = state_change->m_origin;
+                 }
+               log ("event %i: switching state of interest from %qs to %qs",
+                    idx, sm->get_state_name (state_change->m_to),
+                    sm->get_state_name (state_change->m_from));
+               state = state_change->m_from;
+             }
+           else if (m_verbosity < 3)
+             {
+               if (var)
+                 log ("filtering event %i:"
+                      " state change to %qE unrelated to %qE",
+                      idx, state_change->m_var, var);
+               else
+                 log ("filtering event %i: state change to %qE",
+                      idx, state_change->m_var);
+               path->delete_event (idx);
+             }
+         }
+         break;
+
+       case EK_START_CFG_EDGE:
+         {
+           cfg_edge_event *event = (cfg_edge_event *)base_event;
+           const cfg_superedge& cfg_superedge
+             = event->get_cfg_superedge ();
+           const supernode *dest = event->m_sedge->m_dest;
+           /* Do we have an SSA_NAME defined via a phi node in
+              the dest CFG node?  */
+           if (var && TREE_CODE (var) == SSA_NAME)
+             if (SSA_NAME_DEF_STMT (var)->bb == dest->m_bb)
+               {
+                 if (gphi *phi
+                     = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (var)))
+                   {
+                     /* Update var based on its phi node.  */
+                     tree old_var = var;
+                     var = cfg_superedge.get_phi_arg (phi);
+                     log ("updating from %qE to %qE based on phi node",
+                          old_var, var);
+                     if (get_logger ())
+                       {
+                         pretty_printer pp;
+                         pp_gimple_stmt_1 (&pp, phi, 0, (dump_flags_t)0);
+                         log ("  phi: %s", pp_formatted_text (&pp));
+                       }
+                   }
+               }
+
+           /* TODO: is this edge significant to var?
+              See if var can be in other states in the dest, but not
+              in other states in the src?
+              Must have multiple sibling edges.  */
+
+           if (event->should_filter_p (m_verbosity))
+             {
+               log ("filtering event %i: CFG edge", idx);
+               path->delete_event (idx);
+               /* Also delete the corresponding EK_END_CFG_EDGE.  */
+               gcc_assert (path->m_events[idx]->m_kind == EK_END_CFG_EDGE);
+               path->delete_event (idx);
+             }
+         }
+         break;
+
+       case EK_END_CFG_EDGE:
+         /* These come in pairs with EK_START_CFG_EDGE events and are
+            filtered when their start event is filtered.  */
+         break;
+
+       case EK_CALL_EDGE:
+         {
+           call_event *event = (call_event *)base_event;
+           const callgraph_superedge& cg_superedge
+             = event->get_callgraph_superedge ();
+           callsite_expr expr;
+           tree caller_var
+             = cg_superedge.map_expr_from_callee_to_caller (var, &expr);
+           if (caller_var)
+             {
+               log ("event %i:"
+                    " switching var of interest"
+                    " from %qE in callee to %qE in caller",
+                    idx, var, caller_var);
+               var = caller_var;
+               if (expr.param_p ())
+                 event->record_critical_state (var, state);
+             }
+         }
+         break;
+
+       case EK_RETURN_EDGE:
+         // TODO: potentially update var/state based on return value,
+         // args etc
+         {
+           if (var)
+             {
+               return_event *event = (return_event *)base_event;
+               const callgraph_superedge& cg_superedge
+                 = event->get_callgraph_superedge ();
+               callsite_expr expr;
+               tree callee_var
+                 = cg_superedge.map_expr_from_caller_to_callee (var, &expr);
+               if (callee_var)
+                 {
+                   log ("event %i:"
+                        " switching var of interest"
+                        " from %qE in caller to %qE in callee",
+                        idx, var, callee_var);
+                   var = callee_var;
+                   if (expr.return_value_p ())
+                     event->record_critical_state (var, state);
+                 }
+             }
+         }
+         break;
+
+       case EK_SETJMP:
+         /* TODO: only show setjmp_events that matter i.e. those for which
+            there is a later rewind event using them.  */
+       case EK_REWIND_FROM_LONGJMP:
+       case EK_REWIND_TO_SETJMP:
+         break;
+
+       case EK_WARNING:
+         /* Always show the final "warning" event in the path.  */
+         break;
+       }
+      idx--;
+    }
+}
+
+/* Second pass of diagnostic_manager::prune_path: remove redundant
+   interprocedural information.
+
+   For example, given:
+     (1)- calling "f2" from "f1"
+     (2)--- entry to "f2"
+     (3)--- calling "f3" from "f2"
+     (4)----- entry to "f3"
+     (5)--- returning to "f2" to "f3"
+     (6)- returning to "f1" to "f2"
+   with no other intervening events, then none of these events are
+   likely to be interesting to the user.
+
+   Prune [..., call, function-entry, return, ...] triples repeatedly
+   until nothing has changed.  For the example above, this would
+   remove events (3, 4, 5), and then remove events (1, 2, 6).  */
+
+void
+diagnostic_manager::prune_interproc_events (checker_path *path) const
+{
+  bool changed = false;
+  do
+    {
+      changed = false;
+      int idx = path->m_events.length () - 1;
+      while (idx >= 0)
+       {
+         /* Prune [..., call, function-entry, return, ...] triples.  */
+         if (idx + 2 < (signed)path->m_events.length ()
+             && path->m_events[idx]->is_call_p ()
+             && path->m_events[idx + 1]->is_function_entry_p ()
+             && path->m_events[idx + 2]->is_return_p ())
+           {
+             if (get_logger ())
+               {
+                 label_text desc (path->m_events[idx]->get_desc (false));
+                 log ("filtering events %i-%i:"
+                      " irrelevant call/entry/return: %s",
+                      idx, idx + 2, desc.m_buffer);
+                 desc.maybe_free ();
+               }
+             path->delete_event (idx + 2);
+             path->delete_event (idx + 1);
+             path->delete_event (idx);
+             changed = true;
+             idx--;
+             continue;
+           }
+
+         /* Prune [..., call, return, ...] pairs
+            (for -fanalyzer-verbosity=0).  */
+         if (idx + 1 < (signed)path->m_events.length ()
+             && path->m_events[idx]->is_call_p ()
+             && path->m_events[idx + 1]->is_return_p ())
+           {
+             if (get_logger ())
+               {
+                 label_text desc (path->m_events[idx]->get_desc (false));
+                 log ("filtering events %i-%i:"
+                      " irrelevant call/return: %s",
+                      idx, idx + 1, desc.m_buffer);
+                 desc.maybe_free ();
+               }
+             path->delete_event (idx + 1);
+             path->delete_event (idx);
+             changed = true;
+             idx--;
+             continue;
+           }
+
+         idx--;
+       }
+
+    }
+  while (changed);
+}
+
+/* Final pass of diagnostic_manager::prune_path.
+
+   If all we're left with is in one function, then filter function entry
+   events.  */
+
+void
+diagnostic_manager::finish_pruning (checker_path *path) const
+{
+  if (!path->interprocedural_p ())
+    {
+      int idx = path->m_events.length () - 1;
+      while (idx >= 0 && idx < (signed)path->m_events.length ())
+       {
+         checker_event *base_event = path->m_events[idx];
+         if (base_event->m_kind == EK_FUNCTION_ENTRY)
+           {
+             log ("filtering event %i:"
+                  " function entry for purely intraprocedural path", idx);
+             path->delete_event (idx);
+           }
+         idx--;
+       }
+    }
+}
+
+#endif /* #if ENABLE_ANALYZER */
diff --git a/gcc/analyzer/diagnostic-manager.h 
b/gcc/analyzer/diagnostic-manager.h
new file mode 100644
index 000000000000..92623234cdce
--- /dev/null
+++ b/gcc/analyzer/diagnostic-manager.h
@@ -0,0 +1,137 @@
+/* Classes for saving, deduplicating, and emitting analyzer diagnostics.
+   Copyright (C) 2019-2020 Free Software Foundation, Inc.
+   Contributed by David Malcolm <dmalc...@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_ANALYZER_DIAGNOSTIC_MANAGER_H
+#define GCC_ANALYZER_DIAGNOSTIC_MANAGER_H
+
+#include "analyzer/pending-diagnostic.h"
+
+/* A to-be-emitted diagnostic stored within diagnostic_manager.  */
+
+class saved_diagnostic
+{
+public:
+  saved_diagnostic (const state_machine *sm,
+                   const exploded_node *enode,
+                   const supernode *snode, const gimple *stmt,
+                   stmt_finder *stmt_finder,
+                   tree var, state_machine::state_t state,
+                   pending_diagnostic *d);
+  ~saved_diagnostic ();
+
+  bool operator== (const saved_diagnostic &other) const
+  {
+    return (m_sm == other.m_sm
+           /* We don't compare m_enode.  */
+           && m_snode == other.m_snode
+           && m_stmt == other.m_stmt
+           /* We don't compare m_stmt_finder.  */
+           && m_var == other.m_var
+           && m_state == other.m_state
+           && m_d->equal_p (*other.m_d)
+           && m_trailing_eedge == other.m_trailing_eedge);
+  }
+
+  //private:
+  const state_machine *m_sm;
+  const exploded_node *m_enode;
+  const supernode *m_snode;
+  const gimple *m_stmt;
+  stmt_finder *m_stmt_finder;
+  tree m_var;
+  state_machine::state_t m_state;
+  pending_diagnostic *m_d;
+  exploded_edge *m_trailing_eedge;
+
+private:
+  DISABLE_COPY_AND_ASSIGN (saved_diagnostic);
+};
+
+/* A class with responsibility for saving pending diagnostics, so that
+   they can be emitted after the exploded_graph is complete.
+   This lets us de-duplicate diagnostics, and find the shortest path
+   for each similar diagnostic, potentially using edges that might
+   not have been found when each diagnostic was first saved.
+
+   This also lets us compute shortest_paths once, rather than
+   per-diagnostic.  */
+
+class diagnostic_manager : public log_user
+{
+public:
+  diagnostic_manager (logger *logger, int verbosity);
+
+  void add_diagnostic (const state_machine *sm,
+                      const exploded_node *enode,
+                      const supernode *snode, const gimple *stmt,
+                      stmt_finder *finder,
+                      tree var, state_machine::state_t state,
+                      pending_diagnostic *d);
+
+  void add_diagnostic (const exploded_node *enode,
+                      const supernode *snode, const gimple *stmt,
+                      stmt_finder *finder,
+                      pending_diagnostic *d);
+
+  void emit_saved_diagnostics (const exploded_graph &eg);
+
+  void emit_saved_diagnostic (const exploded_graph &eg,
+                             const saved_diagnostic &sd,
+                             const exploded_path &epath,
+                             const gimple *stmt,
+                             int num_dupes);
+
+  unsigned get_num_diagnostics () const
+  {
+    return m_saved_diagnostics.length ();
+  }
+  saved_diagnostic *get_saved_diagnostic (unsigned idx)
+  {
+    return m_saved_diagnostics[idx];
+  }
+
+private:
+  void build_emission_path (const exploded_graph &eg,
+                           const exploded_path &epath,
+                           checker_path *emission_path) const;
+
+  void add_events_for_eedge (const exploded_edge &eedge,
+                            const extrinsic_state &ext_state,
+                            checker_path *emission_path) const;
+
+  void add_events_for_superedge (const exploded_edge &eedge,
+                                checker_path *emission_path) const;
+
+  void prune_path (checker_path *path,
+                  const state_machine *sm,
+                  tree var, state_machine::state_t state) const;
+
+  void prune_for_sm_diagnostic (checker_path *path,
+                               const state_machine *sm,
+                               tree var,
+                               state_machine::state_t state) const;
+  void prune_interproc_events (checker_path *path) const;
+  void finish_pruning (checker_path *path) const;
+
+  auto_delete_vec<saved_diagnostic> m_saved_diagnostics;
+  const int m_verbosity;
+};
+
+#endif /* GCC_ANALYZER_DIAGNOSTIC_MANAGER_H */
-- 
2.21.0

Reply via email to