Hi Andrew! On 2020-07-16T16:06:49+0100, Andrew Stubbs <a...@codesourcery.com> wrote: > This patch adds debug support to mkoffload, similar to what happens in > lto-wrapper.
Ah, good, it's not as invasive/convoluted as I'd assumed from the verbal description you'd given. > Unlike lto-wrapper, we must deal with mismatched architectures and > mismatched program scope. These are fixed up using manual ELF patching > because there's no useful support in simple_object (yet). Should this be > something that other offload targets want to do then that probably ought > to be changed in future, but it isn't necessary yet. ACK. > This is enough to prevent rocgdb choking on malformed debug information > and debug simple offload testcases :-) > it may be that further adjustment is > needed. > > I have not attempted to make similar changes to the other instance of > mkoffload ACK. > as nvptx has no use for debug info. Specifically, nvptx only supports 'DWARF2_LINENO_DEBUGGING_INFO' (and that's mostly untested, too). > --- a/gcc/config/gcn/mkoffload.c > +++ b/gcc/config/gcn/mkoffload.c > @@ -33,31 +33,53 @@ > #include <libgen.h> > #include "collect-utils.h" > #include "gomp-constants.h" > +#include "simple-object.h" > +#include "elf.h" > + > +/* These probably won't be in elf.h for a while. */ > +#ifndef EM_AMDGPU Nope, it already is. ;-P > +#define EM_AMDGPU 0xe0; > + > +#define ELFOSABI_AMDGPU_HSA 64 > +#define ELFABIVERSION_AMDGPU_HSA 1 > + > +#define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a > +#define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c > +#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f > + > +#define R_AMDGPU_NONE 0 > +#define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */ > +#define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */ > +#define R_AMDGPU_ABS64 3 /* S + A */ > +#define R_AMDGPU_REL32 4 /* S + A - P */ > +#define R_AMDGPU_REL64 5 /* S + A - P */ > +#define R_AMDGPU_ABS32 6 /* S + A */ > +#define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */ > +#define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) & > 0xFFFFFFFF */ > +#define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */ > +#define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */ > +#define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */ > +#define reserved 12 (That's not a useful '#define reserved'?) > +#define R_AMDGPU_RELATIVE64 13 /* B + A */ > +#endif The standard Ubuntu 18.04 '/usr/include/elf.h' as shipped by package libc6-dev:amd64 in version 2.27-3ubuntu1 contains: $ grep -n AMDGPU < /usr/include/elf.h 358:#define EM_AMDGPU 224 /* AMD GPU */ ..., and thus: [...]/source-gcc/gcc/config/gcn/mkoffload.c:72:21: error: 'EF_AMDGPU_MACH_AMDGCN_GFX803' was not declared in this scope uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU architecture. ^ [...]/source-gcc/gcc/config/gcn/mkoffload.c: In function 'bool copy_early_debug_info(const char*, const char*)': [...]/source-gcc/gcc/config/gcn/mkoffload.c:290:21: error: 'ELFOSABI_AMDGPU_HSA' was not declared in this scope ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA; ^ [...]/source-gcc/gcc/config/gcn/mkoffload.c:291:21: error: 'ELFABIVERSION_AMDGPU_HSA' was not declared in this scope ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA; ^ [...]/source-gcc/gcc/config/gcn/mkoffload.c:334:24: error: 'R_AMDGPU_ABS32' was not declared in this scope reloc->r_info = R_AMDGPU_ABS32; ^ [...] I've got the canary 'EM_AMDGPU', but not the other '#define's. > -/* Files to unlink. */ > -static const char *gcn_s1_name; > -static const char *gcn_s2_name; > -static const char *gcn_o_name; > -static const char *gcn_cfile_name; > static const char *gcn_dumpbase; > +static struct obstack files_to_cleanup; (Good idea; should do similar in the other 'mkoffload's.) > +uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU > architecture. For easier later maintenance, shouldn't this be a '#define' (or similar) done next to where the GCC back end defines its default? Grüße Thomas > /* Delete tempfiles. */ > > void > tool_cleanup (bool from_signal ATTRIBUTE_UNUSED) > { > - if (gcn_cfile_name) > - maybe_unlink (gcn_cfile_name); > - if (gcn_s1_name) > - maybe_unlink (gcn_s1_name); > - if (gcn_s2_name) > - maybe_unlink (gcn_s2_name); > - if (gcn_o_name) > - maybe_unlink (gcn_o_name); > + obstack_ptr_grow (&files_to_cleanup, NULL); > + const char **files = XOBFINISH (&files_to_cleanup, const char **); > + for (int i = 0; files[i]; i++) > + maybe_unlink (files[i]); > } > > static void > @@ -204,6 +226,180 @@ access_check (const char *name, int mode) > return access (name, mode); > } > > +/* Copy the early-debug-info from the incoming LTO object to a new object > + that will be linked into the output HSACO file. The host relocations > + must be translated into GCN relocations, and any global undefined symbols > + must be weakened (so as not to have the debug info try to pull in host > + junk). > + > + Returns true if the file was created, false otherwise. */ > + > +static bool > +copy_early_debug_info (const char *infile, const char *outfile) > +{ > + const char *errmsg; > + int err; > + > + /* The simple_object code can handle extracting the debug sections. > + This code is based on that in lto-wrapper.c. */ > + int infd = open (infile, O_RDONLY | O_BINARY); > + if (infd == -1) > + return false; > + simple_object_read *inobj = simple_object_start_read (infd, 0, > + "__GNU_LTO", > + &errmsg, &err); > + if (!inobj) > + return false; > + > + off_t off, len; > + if (simple_object_find_section (inobj, ".gnu.debuglto_.debug_info", > + &off, &len, &errmsg, &err) != 1) > + { > + simple_object_release_read (inobj); > + close (infd); > + return false; > + } > + > + errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err, > true); > + if (errmsg) > + { > + unlink_if_ordinary (outfile); > + return false; > + } > + > + simple_object_release_read (inobj); > + close (infd); > + > + /* Open the file we just created for some adjustments. > + The simple_object code can't do this, so we do it manually. */ > + FILE *outfd = fopen (outfile, "r+b"); > + if (!outfd) > + return false; > + > + Elf64_Ehdr ehdr; > + if (fread (&ehdr, sizeof (ehdr), 1, outfd) != 1) > + { > + fclose (outfd); > + return true; > + } > + > + /* We only support host relocations of x86_64, for now. */ > + gcc_assert (ehdr.e_machine == EM_X86_64); > + > + /* Patch the correct elf architecture flag into the file. */ > + ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA; > + ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA; > + ehdr.e_type = ET_REL; > + ehdr.e_machine = EM_AMDGPU; > + ehdr.e_flags = elf_arch; > + > + /* Load the section headers so we can walk them later. */ > + Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr) > + * ehdr.e_shnum); > + if (fseek (outfd, ehdr.e_shoff, SEEK_SET) == -1 > + || fread (sections, sizeof (Elf64_Shdr), ehdr.e_shnum, > + outfd) != ehdr.e_shnum) > + { > + free (sections); > + fclose (outfd); > + return true; > + } > + > + /* Convert the host relocations to target relocations. */ > + for (int i = 0; i < ehdr.e_shnum; i++) > + { > + if (sections[i].sh_type != SHT_RELA) > + continue; > + > + char *data = (char *)xmalloc (sections[i].sh_size); > + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1 > + || fread (data, sections[i].sh_size, 1, outfd) != 1) > + { > + free (data); > + continue; > + } > + > + for (size_t offset = 0; > + offset < sections[i].sh_size; > + offset += sections[i].sh_entsize) > + { > + Elf64_Rela *reloc = (Elf64_Rela *) (data + offset); > + > + /* Map the host relocations to GCN relocations. > + Only relocations that can appear in DWARF need be handled. */ > + switch (ELF64_R_TYPE (reloc->r_info)) > + { > + case R_X86_64_32: > + case R_X86_64_32S: > + reloc->r_info = R_AMDGPU_ABS32; > + break; > + case R_X86_64_PC32: > + reloc->r_info = R_AMDGPU_REL32; > + break; > + case R_X86_64_PC64: > + reloc->r_info = R_AMDGPU_REL64; > + break; > + case R_X86_64_64: > + reloc->r_info = R_AMDGPU_ABS64; > + break; > + case R_X86_64_RELATIVE: > + reloc->r_info = R_AMDGPU_RELATIVE64; > + break; > + default: > + gcc_unreachable (); > + } > + } > + > + /* Write back our relocation changes. */ > + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1) > + fwrite (data, sections[i].sh_size, 1, outfd); > + > + free (data); > + } > + > + /* Weaken any global undefined symbols that would pull in unwanted > + objects. */ > + for (int i = 0; i < ehdr.e_shnum; i++) > + { > + if (sections[i].sh_type != SHT_SYMTAB) > + continue; > + > + char *data = (char *)xmalloc (sections[i].sh_size); > + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1 > + || fread (data, sections[i].sh_size, 1, outfd) != 1) > + { > + free (data); > + continue; > + } > + > + for (size_t offset = 0; > + offset < sections[i].sh_size; > + offset += sections[i].sh_entsize) > + { > + Elf64_Sym *sym = (Elf64_Sym *) (data + offset); > + int type = ELF64_ST_TYPE (sym->st_info); > + int bind = ELF64_ST_BIND (sym->st_info); > + > + if (bind == STB_GLOBAL && sym->st_shndx == 0) > + sym->st_info = ELF64_ST_INFO (STB_WEAK, type); > + } > + > + /* Write back our symbol changes. */ > + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1) > + fwrite (data, sections[i].sh_size, 1, outfd); > + > + free (data); > + } > + free (sections); > + > + /* Write back our header changes. */ > + rewind (outfd); > + fwrite (&ehdr, sizeof (ehdr), 1, outfd); > + > + fclose (outfd); > + return true; > +} > + > /* Parse an input assembler file, extract the offload tables etc., > and output (1) the assembler code, minus the tables (which can contain > problematic relocations), and (2) a C file with the offload tables > @@ -538,9 +734,15 @@ main (int argc, char **argv) > FILE *cfile = stdout; > const char *outname = 0; > > + const char *gcn_s1_name; > + const char *gcn_s2_name; > + const char *gcn_o_name; > + const char *gcn_cfile_name; > + > progname = "mkoffload"; > diagnostic_initialize (global_dc, 0); > > + obstack_init (&files_to_cleanup); > if (atexit (mkoffload_cleanup) != 0) > fatal_error (input_location, "atexit failed"); > > @@ -632,7 +834,14 @@ main (int argc, char **argv) > else if (strcmp (argv[i], "-dumpbase") == 0 > && i + 1 < argc) > dumppfx = argv[++i]; > + else if (strcmp (argv[i], "-march=fiji") == 0) > + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; > + else if (strcmp (argv[i], "-march=gfx900") == 0) > + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900; > + else if (strcmp (argv[i], "-march=gfx906") == 0) > + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906; > } > + > if (!(fopenacc ^ fopenmp)) > fatal_error (input_location, "either -fopenacc or -fopenmp must be set"); > > @@ -693,6 +902,10 @@ main (int argc, char **argv) > gcn_o_name = make_temp_file (".mkoffload.hsaco"); > gcn_cfile_name = make_temp_file (".c"); > } > + obstack_ptr_grow (&files_to_cleanup, gcn_s1_name); > + obstack_ptr_grow (&files_to_cleanup, gcn_s2_name); > + obstack_ptr_grow (&files_to_cleanup, gcn_o_name); > + obstack_ptr_grow (&files_to_cleanup, gcn_cfile_name); > > obstack_ptr_grow (&cc_argv_obstack, "-dumpdir"); > obstack_ptr_grow (&cc_argv_obstack, ""); > @@ -710,6 +923,39 @@ main (int argc, char **argv) > struct obstack ld_argv_obstack; > obstack_init (&ld_argv_obstack); > obstack_ptr_grow (&ld_argv_obstack, driver); > + > + /* Extract early-debug information from the input objects. > + This loop finds all the inputs that end ".o" and aren't the output. */ > + int dbgcount = 0; > + for (int ix = 1; ix != argc; ix++) > + { > + if (!strcmp (argv[ix], "-o") && ix + 1 != argc) > + ++ix; > + else > + { > + if (strcmp (argv[ix] + strlen(argv[ix]) - 2, ".o") == 0) > + { > + char *dbgobj; > + if (save_temps) > + { > + char buf[10]; > + sprintf (buf, "%d", dbgcount++); > + dbgobj = concat (dumppfx, ".mkoffload.dbg", buf, ".o", NULL); > + } > + else > + dbgobj = make_temp_file (".mkoffload.dbg.o"); > + > + /* If the copy fails then just ignore it. */ > + if (copy_early_debug_info (argv[ix], dbgobj)) > + { > + obstack_ptr_grow (&ld_argv_obstack, dbgobj); > + obstack_ptr_grow (&files_to_cleanup, dbgobj); > + } > + else > + free (dbgobj); > + } > + } > + } > obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name); > obstack_ptr_grow (&ld_argv_obstack, "-lgomp"); > ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter