On Wed, 2023-08-02 at 12:20 -0400, Eric Feng wrote:

Hi Eric, thanks for the updated patch.

Overall, looks good to me, although I'd drop the "Exited." from the
"sorry" message (and thus from the dg-message directive), since the
compiler is not exiting, it's just the particular plugin that's giving
up (but let's not hold up the patch with a "bikeshed" discussion on the
precise wording).

If Joseph or Marek approves the C parts of the patch, this will be OK
to push to trunk.

Dave

> Revised:
> -- Fix indentation problems
> -- Add more detail to Changelog
> -- Add new test on handling non-CPython code case
> -- Turn off debugging inform by default
> -- Make on_finish_translation_unit() static
> -- Remove superfluous null checks in init_py_structs()
> 
> Changes have been bootstrapped and tested against trunk on aarch64-
> unknown-linux-gnu.
> 
> ---
> This patch adds a hook to the end of ana::on_finish_translation_unit
> which calls relevant stashing-related callbacks registered during
> plugin
> initialization. This feature is used to stash named types and global
> variables for a CPython analyzer plugin [PR107646].
> 
> gcc/analyzer/ChangeLog:
>         PR analyzer/107646
>         * analyzer-language.cc (run_callbacks): New function.
>         (on_finish_translation_unit): New function.
>         * analyzer-language.h (GCC_ANALYZER_LANGUAGE_H): New include.
>         (class translation_unit): New vfuncs.
> 
> gcc/c/ChangeLog:
>         PR analyzer/107646
>         * c-parser.cc: New functions on stashing values for the
>           analyzer.
> 
> gcc/testsuite/ChangeLog:
>         PR analyzer/107646
>         * gcc.dg/plugin/plugin.exp: Add new plugin and test.
>         * gcc.dg/plugin/analyzer_cpython_plugin.c: New plugin.
>         * gcc.dg/plugin/cpython-plugin-test-1.c: New test.
> 
> Signed-off-by: Eric Feng <ef2...@columbia.edu>
> ---
>  gcc/analyzer/analyzer-language.cc             |  22 ++
>  gcc/analyzer/analyzer-language.h              |   9 +
>  gcc/c/c-parser.cc                             |  26 ++
>  .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 230
> ++++++++++++++++++
>  .../gcc.dg/plugin/cpython-plugin-test-1.c     |   8 +
>  gcc/testsuite/gcc.dg/plugin/plugin.exp        |   2 +
>  6 files changed, 297 insertions(+)
>  create mode 100644
> gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
>  create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-
> 1.c
> 
> diff --git a/gcc/analyzer/analyzer-language.cc
> b/gcc/analyzer/analyzer-language.cc
> index 2c8910906ee..85400288a93 100644
> --- a/gcc/analyzer/analyzer-language.cc
> +++ b/gcc/analyzer/analyzer-language.cc
> @@ -35,6 +35,26 @@ static GTY (()) hash_map <tree, tree>
> *analyzer_stashed_constants;
>  #if ENABLE_ANALYZER
>  
>  namespace ana {
> +static vec<finish_translation_unit_callback>
> +    *finish_translation_unit_callbacks;
> +
> +void
> +register_finish_translation_unit_callback (
> +    finish_translation_unit_callback callback)
> +{
> +  if (!finish_translation_unit_callbacks)
> +    vec_alloc (finish_translation_unit_callbacks, 1);
> +  finish_translation_unit_callbacks->safe_push (callback);
> +}
> +
> +static void
> +run_callbacks (logger *logger, const translation_unit &tu)
> +{
> +  for (auto const &cb : finish_translation_unit_callbacks)
> +    {
> +      cb (logger, tu);
> +    }
> +}
>  
>  /* Call into TU to try to find a value for NAME.
>     If found, stash its value within analyzer_stashed_constants.  */
> @@ -102,6 +122,8 @@ on_finish_translation_unit (const
> translation_unit &tu)
>      the_logger.set_logger (new logger (logfile, 0, 0,
>                                        *global_dc->printer));
>    stash_named_constants (the_logger.get_logger (), tu);
> +
> +  run_callbacks (the_logger.get_logger (), tu);
>  }
>  
>  /* Lookup NAME in the named constants stashed when the frontend TU
> finished.
> diff --git a/gcc/analyzer/analyzer-language.h
> b/gcc/analyzer/analyzer-language.h
> index 00f85aba041..8deea52d627 100644
> --- a/gcc/analyzer/analyzer-language.h
> +++ b/gcc/analyzer/analyzer-language.h
> @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3.  If not see
>  #ifndef GCC_ANALYZER_LANGUAGE_H
>  #define GCC_ANALYZER_LANGUAGE_H
>  
> +#include "analyzer/analyzer-logging.h"
> +
>  #if ENABLE_ANALYZER
>  
>  namespace ana {
> @@ -35,8 +37,15 @@ class translation_unit
>       have been seen).  If it is defined and an integer (e.g. either
> as a
>       macro or enum), return the INTEGER_CST value, otherwise return
> NULL.  */
>    virtual tree lookup_constant_by_id (tree id) const = 0;
> +  virtual tree lookup_type_by_id (tree id) const = 0;
> +  virtual tree lookup_global_var_by_id (tree id) const = 0;
>  };
>  
> +typedef void (*finish_translation_unit_callback)
> +   (logger *, const translation_unit &);
> +void register_finish_translation_unit_callback (
> +    finish_translation_unit_callback callback);
> +
>  /* Analyzer hook for frontends to call at the end of the TU.  */
>  
>  void on_finish_translation_unit (const translation_unit &tu);
> diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
> index cf82b0306d1..617111b0f0a 100644
> --- a/gcc/c/c-parser.cc
> +++ b/gcc/c/c-parser.cc
> @@ -1695,6 +1695,32 @@ public:
>      return NULL_TREE;
>    }
>  
> +  tree
> +  lookup_type_by_id (tree id) const final override
> +  {
> +    if (tree type_decl = lookup_name (id))
> +      {
> +       if (TREE_CODE (type_decl) == TYPE_DECL)
> +         {
> +           tree record_type = TREE_TYPE (type_decl);
> +           if (TREE_CODE (record_type) == RECORD_TYPE)
> +             return record_type;
> +         }
> +      }
> +
> +    return NULL_TREE;
> +  }
> +
> +  tree
> +  lookup_global_var_by_id (tree id) const final override
> +  {
> +    if (tree var_decl = lookup_name (id))
> +      if (TREE_CODE (var_decl) == VAR_DECL)
> +       return var_decl;
> +
> +    return NULL_TREE;
> +  }
> +
>  private:
>    /* Attempt to get an INTEGER_CST from MACRO.
>       Only handle the simplest cases: where MACRO's definition is a
> single
> diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> new file mode 100644
> index 00000000000..72375cfd879
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> @@ -0,0 +1,230 @@
> +/* -fanalyzer plugin for CPython extension modules  */
> +/* { dg-options "-g" } */
> +
> +#define INCLUDE_MEMORY
> +#include "gcc-plugin.h"
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "tree.h"
> +#include "function.h"
> +#include "basic-block.h"
> +#include "gimple.h"
> +#include "gimple-iterator.h"
> +#include "diagnostic-core.h"
> +#include "graphviz.h"
> +#include "options.h"
> +#include "cgraph.h"
> +#include "tree-dfa.h"
> +#include "stringpool.h"
> +#include "convert.h"
> +#include "target.h"
> +#include "fold-const.h"
> +#include "tree-pretty-print.h"
> +#include "diagnostic-color.h"
> +#include "diagnostic-metadata.h"
> +#include "tristate.h"
> +#include "bitmap.h"
> +#include "selftest.h"
> +#include "function.h"
> +#include "json.h"
> +#include "analyzer/analyzer.h"
> +#include "analyzer/analyzer-language.h"
> +#include "analyzer/analyzer-logging.h"
> +#include "ordered-hash-map.h"
> +#include "options.h"
> +#include "cgraph.h"
> +#include "cfg.h"
> +#include "digraph.h"
> +#include "analyzer/supergraph.h"
> +#include "sbitmap.h"
> +#include "analyzer/call-string.h"
> +#include "analyzer/program-point.h"
> +#include "analyzer/store.h"
> +#include "analyzer/region-model.h"
> +#include "analyzer/call-details.h"
> +#include "analyzer/call-info.h"
> +#include "make-unique.h"
> +
> +int plugin_is_GPL_compatible;
> +
> +#if ENABLE_ANALYZER
> +static GTY (()) hash_map<tree, tree> *analyzer_stashed_types;
> +static GTY (()) hash_map<tree, tree> *analyzer_stashed_globals;
> +
> +namespace ana
> +{
> +static tree pyobj_record = NULL_TREE;
> +static tree varobj_record = NULL_TREE;
> +static tree pylistobj_record = NULL_TREE;
> +static tree pylongobj_record = NULL_TREE;
> +static tree pylongtype_vardecl = NULL_TREE;
> +static tree pylisttype_vardecl = NULL_TREE;
> +
> +static tree
> +get_field_by_name (tree type, const char *name)
> +{
> +  for (tree field = TYPE_FIELDS (type); field; field = TREE_CHAIN
> (field))
> +    {
> +      if (TREE_CODE (field) == FIELD_DECL)
> +        {
> +          const char *field_name = IDENTIFIER_POINTER (DECL_NAME
> (field));
> +          if (strcmp (field_name, name) == 0)
> +            return field;
> +        }
> +    }
> +  return NULL_TREE;
> +}
> +
> +static void
> +maybe_stash_named_type (logger *logger, const translation_unit &tu,
> +                        const char *name)
> +{
> +  LOG_FUNC_1 (logger, "name: %qs", name);
> +  if (!analyzer_stashed_types)
> +    analyzer_stashed_types = hash_map<tree, tree>::create_ggc ();
> +
> +  tree id = get_identifier (name);
> +  if (tree t = tu.lookup_type_by_id (id))
> +    {
> +      gcc_assert (TREE_CODE (t) == RECORD_TYPE);
> +      analyzer_stashed_types->put (id, t);
> +      if (logger)
> +        logger->log ("found %qs: %qE", name, t);
> +    }
> +  else
> +    {
> +      if (logger)
> +        logger->log ("%qs: not found", name);
> +    }
> +}
> +
> +static void
> +maybe_stash_global_var (logger *logger, const translation_unit &tu,
> +                        const char *name)
> +{
> +  LOG_FUNC_1 (logger, "name: %qs", name);
> +  if (!analyzer_stashed_globals)
> +    analyzer_stashed_globals = hash_map<tree, tree>::create_ggc ();
> +
> +  tree id = get_identifier (name);
> +  if (tree t = tu.lookup_global_var_by_id (id))
> +    {
> +      gcc_assert (TREE_CODE (t) == VAR_DECL);
> +      analyzer_stashed_globals->put (id, t);
> +      if (logger)
> +        logger->log ("found %qs: %qE", name, t);
> +    }
> +  else
> +    {
> +      if (logger)
> +        logger->log ("%qs: not found", name);
> +    }
> +}
> +
> +static void
> +stash_named_types (logger *logger, const translation_unit &tu)
> +{
> +  LOG_SCOPE (logger);
> +
> +  maybe_stash_named_type (logger, tu, "PyObject");
> +  maybe_stash_named_type (logger, tu, "PyListObject");
> +  maybe_stash_named_type (logger, tu, "PyVarObject");
> +  maybe_stash_named_type (logger, tu, "PyLongObject");
> +}
> +
> +static void
> +stash_global_vars (logger *logger, const translation_unit &tu)
> +{
> +  LOG_SCOPE (logger);
> +
> +  maybe_stash_global_var (logger, tu, "PyLong_Type");
> +  maybe_stash_global_var (logger, tu, "PyList_Type");
> +}
> +
> +static tree
> +get_stashed_type_by_name (const char *name)
> +{
> +  if (!analyzer_stashed_types)
> +    return NULL_TREE;
> +  tree id = get_identifier (name);
> +  if (tree *slot = analyzer_stashed_types->get (id))
> +    {
> +      gcc_assert (TREE_CODE (*slot) == RECORD_TYPE);
> +      return *slot;
> +    }
> +  return NULL_TREE;
> +}
> +
> +static tree
> +get_stashed_global_var_by_name (const char *name)
> +{
> +  if (!analyzer_stashed_globals)
> +    return NULL_TREE;
> +  tree id = get_identifier (name);
> +  if (tree *slot = analyzer_stashed_globals->get (id))
> +    {
> +      gcc_assert (TREE_CODE (*slot) == VAR_DECL);
> +      return *slot;
> +    }
> +  return NULL_TREE;
> +}
> +
> +static void
> +init_py_structs ()
> +{
> +  pyobj_record = get_stashed_type_by_name ("PyObject");
> +  varobj_record = get_stashed_type_by_name ("PyVarObject");
> +  pylistobj_record = get_stashed_type_by_name ("PyListObject");
> +  pylongobj_record = get_stashed_type_by_name ("PyLongObject");
> +  pylongtype_vardecl = get_stashed_global_var_by_name
> ("PyLong_Type");
> +  pylisttype_vardecl = get_stashed_global_var_by_name
> ("PyList_Type");
> +}
> +
> +void
> +sorry_no_cpython_plugin ()
> +{
> +  sorry ("%qs definitions not found."
> +        " Please ensure to %qs. Exiting.)",
> +        "Python/C API", "#include <Python.h>");
> +}
> +
> +static void
> +cpython_analyzer_init_cb (void *gcc_data, void * /*user_data */)
> +{
> +  ana::plugin_analyzer_init_iface *iface
> +      = (ana::plugin_analyzer_init_iface *)gcc_data;
> +  LOG_SCOPE (iface->get_logger ());
> +  if (0)
> +    inform (input_location, "got here: cpython_analyzer_init_cb");
> +
> +  init_py_structs ();
> +
> +  if (pyobj_record == NULL_TREE)
> +    {
> +      sorry_no_cpython_plugin ();
> +      return;
> +    }
> +}
> +} // namespace ana
> +
> +#endif /* #if ENABLE_ANALYZER */
> +
> +int
> +plugin_init (struct plugin_name_args *plugin_info,
> +             struct plugin_gcc_version *version)
> +{
> +#if ENABLE_ANALYZER
> +  const char *plugin_name = plugin_info->base_name;
> +  if (0)
> +    inform (input_location, "got here; %qs", plugin_name);
> +  ana::register_finish_translation_unit_callback
> (&stash_named_types);
> +  ana::register_finish_translation_unit_callback
> (&stash_global_vars);
> +  register_callback (plugin_info->base_name, PLUGIN_ANALYZER_INIT,
> +                     ana::cpython_analyzer_init_cb,
> +                     NULL); /* void *user_data */
> +#else
> +  sorry_no_analyzer ();
> +#endif
> +  return 0;
> +}
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c
> b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c
> new file mode 100644
> index 00000000000..30f17355711
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-1.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-options "-fanalyzer" } */
> +/* { dg-require-effective-target analyzer } */
> +/* { dg-message "'Python/C API' definitions not found. Please ensure
> to '#include <Python.h>'. Exiting." "" { target *-*-* } 0 } */
> +
> +void test_no_python_plugin ()
> +{
> +}
> \ No newline at end of file
> diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp
> b/gcc/testsuite/gcc.dg/plugin/plugin.exp
> index 60723a20eda..09c45394b1f 100644
> --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
> +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
> @@ -160,6 +160,8 @@ set plugin_test_list [list \
>           taint-CVE-2011-0521-5-fixed.c \
>           taint-CVE-2011-0521-6.c \
>           taint-antipatterns-1.c } \
> +    { analyzer_cpython_plugin.c \
> +         cpython-plugin-test-1.c } \
>  ]
>  
>  foreach plugin_test $plugin_test_list {

Reply via email to