Hi Bernd, This patch allows to compile binaries with offloading without passing -flto option, and w/o performing link-time optimizations of the host code.
How it works: 1. If there is at least one function or global variable to offload, gcc sets flag_generate_lto. This enables writing the bytecode produced by ipa_write_summaries into .gnu.target_lto_* sections (.gnu.lto_* sections are not created). Also this flag emits LTO marker (__gnu_lto_v1). 2. This step is not changed: collect2 scans object files for the LTO marker and fills the list of LTO objects. If the list is not empty, it runs lto-wrapper to perform link-time recompilation. 3. lto-wrapper compiles images for targets. And if -flto option is absent (lto_mode == LTO_MODE_NONE), then it just returns the list of input objects without recompilation. One known issue -- the final binary contains temporary .gnu.target_lto_* sections. This can be solved by adding the following linker script to the list of input files: SECTIONS { /DISCARD/ : { *(.gnu.target_lto_*) } } But I'm sure what is the best way to this automatically. Bootstrap and make check passed, tests with '#pragma omp target' without -flto passed. What do you think? Thanks, -- Ilya --- gcc/cgraphunit.c | 39 +++++++++++++++++++++++-------- gcc/lto-wrapper.c | 68 +++++++++++++++++++++++++++++-------------------------- gcc/omp-low.c | 6 +++++ gcc/passes.c | 2 +- 4 files changed, 73 insertions(+), 42 deletions(-) diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index f0c9f5c..32b35f3 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -2040,13 +2040,26 @@ output_in_order (void) free (nodes); } -/* Collect all global variables with "omp declare target" attribute into - OFFLOAD_VARS. It will be streamed out in ipa_write_summaries. */ +/* Check whether there is at least one function or global variable to offload. + Also collect all such global variables into OFFLOAD_VARS, the functions were + already collected in omp-low.c. They will be streamed out in + ipa_write_summaries. */ -static void -init_offload_var_table (void) +static bool +initialize_offload (void) { + bool have_offload = false; + struct cgraph_node *node; struct varpool_node *vnode; + + FOR_EACH_DEFINED_FUNCTION (node) + if (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (node->decl))) + { + have_offload = true; + break; + } + FOR_EACH_DEFINED_VARIABLE (vnode) { if (!lookup_attribute ("omp declare target", @@ -2054,13 +2067,17 @@ init_offload_var_table (void) || TREE_CODE (vnode->decl) != VAR_DECL || DECL_SIZE (vnode->decl) == 0) continue; + have_offload = true; vec_safe_push (offload_vars, vnode->decl); } + + return have_offload; } static void ipa_passes (void) { + bool have_offload = false; gcc::pass_manager *passes = g->get_passes (); set_cfun (NULL); @@ -2068,6 +2085,14 @@ ipa_passes (void) gimple_register_cfg_hooks (); bitmap_obstack_initialize (NULL); + if (!in_lto_p && (flag_openacc || flag_openmp)) + { + have_offload = initialize_offload (); + /* OpenACC / OpenMP offloading requires LTO infrastructure. */ + if (have_offload) + flag_generate_lto = 1; + } + invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_START, NULL); if (!in_lto_p) @@ -2108,11 +2133,7 @@ ipa_passes (void) if (!in_lto_p) { - init_offload_var_table (); - - if ((flag_openacc || flag_openmp) - && !(vec_safe_is_empty (offload_funcs) - && vec_safe_is_empty (offload_vars))) + if (have_offload) { section_name_prefix = OMP_SECTION_NAME_PREFIX; ipa_write_summaries (true); diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c index 80d10f3..e9245f1 100644 --- a/gcc/lto-wrapper.c +++ b/gcc/lto-wrapper.c @@ -668,6 +668,11 @@ run_gcc (unsigned argc, char *argv[]) close (fd); continue; } + /* We may choose not to write out this .opts section in the future. In + that case we'll have to use something else to look for. */ + if (simple_object_find_section (sobj, OMP_SECTION_NAME_PREFIX "." "opts", + &offset, &length, &errmsg, &err)) + have_offload = true; if (!simple_object_find_section (sobj, LTO_SECTION_NAME_PREFIX "." "opts", &offset, &length, &errmsg, &err)) { @@ -675,11 +680,6 @@ run_gcc (unsigned argc, char *argv[]) close (fd); continue; } - /* We may choose not to write out this .opts section in the future. In - that case we'll have to use something else to look for. */ - if (simple_object_find_section (sobj, OMP_SECTION_NAME_PREFIX "." "opts", - &offset, &length, &errmsg, &err)) - have_offload = true; lseek (fd, file_offset + offset, SEEK_SET); data = (char *)xmalloc (length); read (fd, data, length); @@ -871,7 +871,31 @@ run_gcc (unsigned argc, char *argv[]) /* Remember at which point we can scrub args to re-use the commons. */ new_head_argc = obstack_object_size (&argv_obstack) / sizeof (void *); - if (lto_mode == LTO_MODE_LTO) + if (have_offload) + { + compile_images_for_openmp_targets (argc, argv); + if (offload_names) + { + find_ompbeginend (); + for (i = 0; offload_names[i]; i++) + printf ("%s\n", offload_names[i]); + free_array_of_ptrs ((void **) offload_names, i); + } + } + + if (ompbegin) + printf ("%s\n", ompbegin); + + if (lto_mode == LTO_MODE_NONE) + { + /* If we are in lto-wrapper, but -flto option is absent, it means that + there is no need to perform a link-time recompilation, i.e. lto-wrapper + is used only for compiling offload images. */ + for (i = 1; i < argc; ++i) + printf ("%s\n", argv[i]); + goto finish; + } + else if (lto_mode == LTO_MODE_LTO) { flto_out = make_temp_file (".lto.o"); if (linker_output) @@ -879,7 +903,7 @@ run_gcc (unsigned argc, char *argv[]) obstack_ptr_grow (&argv_obstack, "-o"); obstack_ptr_grow (&argv_obstack, flto_out); } - else + else if (lto_mode == LTO_MODE_WHOPR) { const char *list_option = "-fltrans-output-list="; size_t list_option_len = strlen (list_option); @@ -939,7 +963,7 @@ run_gcc (unsigned argc, char *argv[]) free (flto_out); flto_out = NULL; } - else + else if (lto_mode == LTO_MODE_WHOPR) { FILE *stream = fopen (ltrans_output_file, "r"); FILE *mstream = NULL; @@ -1084,25 +1108,6 @@ cont: for (i = 0; i < nr; ++i) maybe_unlink (input_names[i]); } - if (have_offload) - { - compile_images_for_openmp_targets (argc, argv); - if (offload_names) - { - find_ompbeginend (); - for (i = 0; offload_names[i]; i++) - { - fputs (offload_names[i], stdout); - putc ('\n', stdout); - } - free_array_of_ptrs ((void **)offload_names, i); - } - } - if (ompbegin) - { - fputs (ompbegin, stdout); - putc ('\n', stdout); - } for (i = 0; i < nr; ++i) { @@ -1110,11 +1115,6 @@ cont: putc ('\n', stdout); free (input_names[i]); } - if (ompend) - { - fputs (ompend, stdout); - putc ('\n', stdout); - } nr = 0; free (output_names); free (input_names); @@ -1122,6 +1122,10 @@ cont: obstack_free (&env_obstack, NULL); } +finish: + if (ompend) + printf ("%s\n", ompend); + obstack_free (&argv_obstack, NULL); } diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 1ad98ab..9289031 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -13771,6 +13771,12 @@ omp_finish_file (void) get_identifier (".omp_var_table"), vars_decl_type); TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1; + /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node), + otherwise a joint table in a binary will contain padding between + tables from multiple object files. */ + DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1; + DECL_ALIGN (funcs_decl) = TYPE_ALIGN (funcs_decl_type); + DECL_ALIGN (vars_decl) = TYPE_ALIGN (vars_decl_type); DECL_INITIAL (funcs_decl) = ctor_f; DECL_INITIAL (vars_decl) = ctor_v; set_decl_section_name (funcs_decl, funcs_section_name); diff --git a/gcc/passes.c b/gcc/passes.c index 8172185..e776059 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -2303,7 +2303,7 @@ ipa_write_summaries (bool is_omp) struct cgraph_node *node; struct cgraph_node **order; - if (!(flag_generate_lto || flag_openacc || flag_openmp) || seen_error () ) + if (!flag_generate_lto || seen_error ()) return; select_what_to_dump (is_omp); -- 1.8.3.1