Hi Bernd,

This patch allows to compile binaries with offloading without passing -flto 
option, and
w/o performing link-time optimizations of the host code.

How it works:
1.  If there is at least one function or global variable to offload, gcc sets 
flag_generate_lto.
This enables writing the bytecode produced by ipa_write_summaries into
.gnu.target_lto_* sections (.gnu.lto_* sections are not created).
Also this flag emits LTO marker (__gnu_lto_v1).
2.  This step is not changed: collect2 scans object files for the LTO marker 
and fills the list
of LTO objects.  If the list is not empty, it runs lto-wrapper to perform 
link-time recompilation.
3.  lto-wrapper compiles images for targets.  And if -flto option is absent
(lto_mode == LTO_MODE_NONE), then it just returns the list of input objects 
without recompilation.

One known issue -- the final binary contains temporary .gnu.target_lto_* 
sections.
This can be solved by adding the following linker script to the list of input 
files:
SECTIONS { /DISCARD/ : { *(.gnu.target_lto_*) } }
But I'm sure what is the best way to this automatically.

Bootstrap and make check passed, tests with '#pragma omp target' without -flto 
passed.
What do you think?

Thanks,
  -- Ilya


---
 gcc/cgraphunit.c  | 39 +++++++++++++++++++++++--------
 gcc/lto-wrapper.c | 68 +++++++++++++++++++++++++++++--------------------------
 gcc/omp-low.c     |  6 +++++
 gcc/passes.c      |  2 +-
 4 files changed, 73 insertions(+), 42 deletions(-)

diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index f0c9f5c..32b35f3 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -2040,13 +2040,26 @@ output_in_order (void)
   free (nodes);
 }
 
-/* Collect all global variables with "omp declare target" attribute into
-   OFFLOAD_VARS.  It will be streamed out in ipa_write_summaries.  */
+/* Check whether there is at least one function or global variable to offload.
+   Also collect all such global variables into OFFLOAD_VARS, the functions were
+   already collected in omp-low.c.  They will be streamed out in
+   ipa_write_summaries.  */
 
-static void
-init_offload_var_table (void)
+static bool
+initialize_offload (void)
 {
+  bool have_offload = false;
+  struct cgraph_node *node;
   struct varpool_node *vnode;
+
+  FOR_EACH_DEFINED_FUNCTION (node)
+    if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (node->decl)))
+      {
+       have_offload = true;
+       break;
+      }
+
   FOR_EACH_DEFINED_VARIABLE (vnode)
     {
       if (!lookup_attribute ("omp declare target",
@@ -2054,13 +2067,17 @@ init_offload_var_table (void)
          || TREE_CODE (vnode->decl) != VAR_DECL
          || DECL_SIZE (vnode->decl) == 0)
        continue;
+      have_offload = true;
       vec_safe_push (offload_vars, vnode->decl);
     }
+
+  return have_offload;
 }
 
 static void
 ipa_passes (void)
 {
+  bool have_offload = false;
   gcc::pass_manager *passes = g->get_passes ();
 
   set_cfun (NULL);
@@ -2068,6 +2085,14 @@ ipa_passes (void)
   gimple_register_cfg_hooks ();
   bitmap_obstack_initialize (NULL);
 
+  if (!in_lto_p && (flag_openacc || flag_openmp))
+    {
+      have_offload = initialize_offload ();
+      /* OpenACC / OpenMP offloading requires LTO infrastructure.  */
+      if (have_offload)
+       flag_generate_lto = 1;
+    }
+
   invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_START, NULL);
 
   if (!in_lto_p)
@@ -2108,11 +2133,7 @@ ipa_passes (void)
 
   if (!in_lto_p)
     {
-      init_offload_var_table ();
-
-      if ((flag_openacc || flag_openmp)
-         && !(vec_safe_is_empty (offload_funcs)
-              && vec_safe_is_empty (offload_vars)))
+      if (have_offload)
        {
          section_name_prefix = OMP_SECTION_NAME_PREFIX;
          ipa_write_summaries (true);
diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
index 80d10f3..e9245f1 100644
--- a/gcc/lto-wrapper.c
+++ b/gcc/lto-wrapper.c
@@ -668,6 +668,11 @@ run_gcc (unsigned argc, char *argv[])
          close (fd);
          continue;
        }
+      /* We may choose not to write out this .opts section in the future.  In
+        that case we'll have to use something else to look for.  */
+      if (simple_object_find_section (sobj, OMP_SECTION_NAME_PREFIX "." "opts",
+                                     &offset, &length, &errmsg, &err))
+       have_offload = true;
       if (!simple_object_find_section (sobj, LTO_SECTION_NAME_PREFIX "." 
"opts",
                                       &offset, &length, &errmsg, &err))
        {
@@ -675,11 +680,6 @@ run_gcc (unsigned argc, char *argv[])
          close (fd);
          continue;
        }
-      /* We may choose not to write out this .opts section in the future.  In
-        that case we'll have to use something else to look for.  */
-      if (simple_object_find_section (sobj, OMP_SECTION_NAME_PREFIX "." "opts",
-                                     &offset, &length, &errmsg, &err))
-       have_offload = true;
       lseek (fd, file_offset + offset, SEEK_SET);
       data = (char *)xmalloc (length);
       read (fd, data, length);
@@ -871,7 +871,31 @@ run_gcc (unsigned argc, char *argv[])
   /* Remember at which point we can scrub args to re-use the commons.  */
   new_head_argc = obstack_object_size (&argv_obstack) / sizeof (void *);
 
-  if (lto_mode == LTO_MODE_LTO)
+  if (have_offload)
+    {
+      compile_images_for_openmp_targets (argc, argv);
+      if (offload_names)
+       {
+         find_ompbeginend ();
+         for (i = 0; offload_names[i]; i++)
+           printf ("%s\n", offload_names[i]);
+         free_array_of_ptrs ((void **) offload_names, i);
+       }
+    }
+
+  if (ompbegin)
+    printf ("%s\n", ompbegin);
+
+  if (lto_mode == LTO_MODE_NONE)
+    {
+      /* If we are in lto-wrapper, but -flto option is absent, it means that
+        there is no need to perform a link-time recompilation, i.e. lto-wrapper
+        is used only for compiling offload images.  */
+      for (i = 1; i < argc; ++i)
+       printf ("%s\n", argv[i]);
+      goto finish;
+    }
+  else if (lto_mode == LTO_MODE_LTO)
     {
       flto_out = make_temp_file (".lto.o");
       if (linker_output)
@@ -879,7 +903,7 @@ run_gcc (unsigned argc, char *argv[])
       obstack_ptr_grow (&argv_obstack, "-o");
       obstack_ptr_grow (&argv_obstack, flto_out);
     }
-  else 
+  else if (lto_mode == LTO_MODE_WHOPR)
     {
       const char *list_option = "-fltrans-output-list=";
       size_t list_option_len = strlen (list_option);
@@ -939,7 +963,7 @@ run_gcc (unsigned argc, char *argv[])
       free (flto_out);
       flto_out = NULL;
     }
-  else
+  else if (lto_mode == LTO_MODE_WHOPR)
     {
       FILE *stream = fopen (ltrans_output_file, "r");
       FILE *mstream = NULL;
@@ -1084,25 +1108,6 @@ cont:
          for (i = 0; i < nr; ++i)
            maybe_unlink (input_names[i]);
        }
-      if (have_offload)
-       {
-         compile_images_for_openmp_targets (argc, argv);
-         if (offload_names)
-           {
-             find_ompbeginend ();
-             for (i = 0; offload_names[i]; i++)
-               {
-                 fputs (offload_names[i], stdout);
-                 putc ('\n', stdout);
-               }
-             free_array_of_ptrs ((void **)offload_names, i);
-           }
-       }
-      if (ompbegin)
-       {
-         fputs (ompbegin, stdout);
-         putc ('\n', stdout);
-       }
 
       for (i = 0; i < nr; ++i)
        {
@@ -1110,11 +1115,6 @@ cont:
          putc ('\n', stdout);
          free (input_names[i]);
        }
-      if (ompend)
-       {
-         fputs (ompend, stdout);
-         putc ('\n', stdout);
-       }
       nr = 0;
       free (output_names);
       free (input_names);
@@ -1122,6 +1122,10 @@ cont:
       obstack_free (&env_obstack, NULL);
     }
 
+finish:
+  if (ompend)
+    printf ("%s\n", ompend);
+
   obstack_free (&argv_obstack, NULL);
 }
 
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 1ad98ab..9289031 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -13771,6 +13771,12 @@ omp_finish_file (void)
                                   get_identifier (".omp_var_table"),
                                   vars_decl_type);
       TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
+      /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
+        otherwise a joint table in a binary will contain padding between
+        tables from multiple object files.  */
+      DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
+      DECL_ALIGN (funcs_decl) = TYPE_ALIGN (funcs_decl_type);
+      DECL_ALIGN (vars_decl) = TYPE_ALIGN (vars_decl_type);
       DECL_INITIAL (funcs_decl) = ctor_f;
       DECL_INITIAL (vars_decl) = ctor_v;
       set_decl_section_name (funcs_decl, funcs_section_name);
diff --git a/gcc/passes.c b/gcc/passes.c
index 8172185..e776059 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -2303,7 +2303,7 @@ ipa_write_summaries (bool is_omp)
   struct cgraph_node *node;
   struct cgraph_node **order;
 
-  if (!(flag_generate_lto || flag_openacc || flag_openmp) || seen_error () )
+  if (!flag_generate_lto || seen_error ())
     return;
 
   select_what_to_dump (is_omp);
-- 
1.8.3.1

Reply via email to