Hi Andrew!
On 2020-07-16T16:06:49+0100, Andrew Stubbs <[email protected]> wrote:
> This patch adds debug support to mkoffload, similar to what happens in
> lto-wrapper.
Ah, good, it's not as invasive/convoluted as I'd assumed from the verbal
description you'd given.
> Unlike lto-wrapper, we must deal with mismatched architectures and
> mismatched program scope. These are fixed up using manual ELF patching
> because there's no useful support in simple_object (yet). Should this be
> something that other offload targets want to do then that probably ought
> to be changed in future, but it isn't necessary yet.
ACK.
> This is enough to prevent rocgdb choking on malformed debug information
> and debug simple offload testcases
:-)
> it may be that further adjustment is
> needed.
>
> I have not attempted to make similar changes to the other instance of
> mkoffload
ACK.
> as nvptx has no use for debug info.
Specifically, nvptx only supports 'DWARF2_LINENO_DEBUGGING_INFO' (and
that's mostly untested, too).
> --- a/gcc/config/gcn/mkoffload.c
> +++ b/gcc/config/gcn/mkoffload.c
> @@ -33,31 +33,53 @@
> #include <libgen.h>
> #include "collect-utils.h"
> #include "gomp-constants.h"
> +#include "simple-object.h"
> +#include "elf.h"
> +
> +/* These probably won't be in elf.h for a while. */
> +#ifndef EM_AMDGPU
Nope, it already is. ;-P
> +#define EM_AMDGPU 0xe0;
> +
> +#define ELFOSABI_AMDGPU_HSA 64
> +#define ELFABIVERSION_AMDGPU_HSA 1
> +
> +#define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a
> +#define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c
> +#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
> +
> +#define R_AMDGPU_NONE 0
> +#define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */
> +#define R_AMDGPU_ABS32_HI 2 /* (S + A) >> 32 */
> +#define R_AMDGPU_ABS64 3 /* S + A */
> +#define R_AMDGPU_REL32 4 /* S + A - P */
> +#define R_AMDGPU_REL64 5 /* S + A - P */
> +#define R_AMDGPU_ABS32 6 /* S + A */
> +#define R_AMDGPU_GOTPCREL 7 /* G + GOT + A - P */
> +#define R_AMDGPU_GOTPCREL32_LO 8 /* (G + GOT + A - P) &
> 0xFFFFFFFF */
> +#define R_AMDGPU_GOTPCREL32_HI 9 /* (G + GOT + A - P) >> 32 */
> +#define R_AMDGPU_REL32_LO 10 /* (S + A - P) & 0xFFFFFFFF */
> +#define R_AMDGPU_REL32_HI 11 /* (S + A - P) >> 32 */
> +#define reserved 12
(That's not a useful '#define reserved'?)
> +#define R_AMDGPU_RELATIVE64 13 /* B + A */
> +#endif
The standard Ubuntu 18.04 '/usr/include/elf.h' as shipped by package
libc6-dev:amd64 in version 2.27-3ubuntu1 contains:
$ grep -n AMDGPU < /usr/include/elf.h
358:#define EM_AMDGPU 224 /* AMD GPU */
..., and thus:
[...]/source-gcc/gcc/config/gcn/mkoffload.c:72:21: error:
'EF_AMDGPU_MACH_AMDGCN_GFX803' was not declared in this scope
uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU
architecture.
^
[...]/source-gcc/gcc/config/gcn/mkoffload.c: In function 'bool
copy_early_debug_info(const char*, const char*)':
[...]/source-gcc/gcc/config/gcn/mkoffload.c:290:21: error:
'ELFOSABI_AMDGPU_HSA' was not declared in this scope
ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA;
^
[...]/source-gcc/gcc/config/gcn/mkoffload.c:291:21: error:
'ELFABIVERSION_AMDGPU_HSA' was not declared in this scope
ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA;
^
[...]/source-gcc/gcc/config/gcn/mkoffload.c:334:24: error: 'R_AMDGPU_ABS32'
was not declared in this scope
reloc->r_info = R_AMDGPU_ABS32;
^
[...]
I've got the canary 'EM_AMDGPU', but not the other '#define's.
> -/* Files to unlink. */
> -static const char *gcn_s1_name;
> -static const char *gcn_s2_name;
> -static const char *gcn_o_name;
> -static const char *gcn_cfile_name;
> static const char *gcn_dumpbase;
> +static struct obstack files_to_cleanup;
(Good idea; should do similar in the other 'mkoffload's.)
> +uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803; // Default GPU
> architecture.
For easier later maintenance, shouldn't this be a '#define' (or similar)
done next to where the GCC back end defines its default?
Grüße
Thomas
> /* Delete tempfiles. */
>
> void
> tool_cleanup (bool from_signal ATTRIBUTE_UNUSED)
> {
> - if (gcn_cfile_name)
> - maybe_unlink (gcn_cfile_name);
> - if (gcn_s1_name)
> - maybe_unlink (gcn_s1_name);
> - if (gcn_s2_name)
> - maybe_unlink (gcn_s2_name);
> - if (gcn_o_name)
> - maybe_unlink (gcn_o_name);
> + obstack_ptr_grow (&files_to_cleanup, NULL);
> + const char **files = XOBFINISH (&files_to_cleanup, const char **);
> + for (int i = 0; files[i]; i++)
> + maybe_unlink (files[i]);
> }
>
> static void
> @@ -204,6 +226,180 @@ access_check (const char *name, int mode)
> return access (name, mode);
> }
>
> +/* Copy the early-debug-info from the incoming LTO object to a new object
> + that will be linked into the output HSACO file. The host relocations
> + must be translated into GCN relocations, and any global undefined symbols
> + must be weakened (so as not to have the debug info try to pull in host
> + junk).
> +
> + Returns true if the file was created, false otherwise. */
> +
> +static bool
> +copy_early_debug_info (const char *infile, const char *outfile)
> +{
> + const char *errmsg;
> + int err;
> +
> + /* The simple_object code can handle extracting the debug sections.
> + This code is based on that in lto-wrapper.c. */
> + int infd = open (infile, O_RDONLY | O_BINARY);
> + if (infd == -1)
> + return false;
> + simple_object_read *inobj = simple_object_start_read (infd, 0,
> + "__GNU_LTO",
> + &errmsg, &err);
> + if (!inobj)
> + return false;
> +
> + off_t off, len;
> + if (simple_object_find_section (inobj, ".gnu.debuglto_.debug_info",
> + &off, &len, &errmsg, &err) != 1)
> + {
> + simple_object_release_read (inobj);
> + close (infd);
> + return false;
> + }
> +
> + errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err,
> true);
> + if (errmsg)
> + {
> + unlink_if_ordinary (outfile);
> + return false;
> + }
> +
> + simple_object_release_read (inobj);
> + close (infd);
> +
> + /* Open the file we just created for some adjustments.
> + The simple_object code can't do this, so we do it manually. */
> + FILE *outfd = fopen (outfile, "r+b");
> + if (!outfd)
> + return false;
> +
> + Elf64_Ehdr ehdr;
> + if (fread (&ehdr, sizeof (ehdr), 1, outfd) != 1)
> + {
> + fclose (outfd);
> + return true;
> + }
> +
> + /* We only support host relocations of x86_64, for now. */
> + gcc_assert (ehdr.e_machine == EM_X86_64);
> +
> + /* Patch the correct elf architecture flag into the file. */
> + ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA;
> + ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA;
> + ehdr.e_type = ET_REL;
> + ehdr.e_machine = EM_AMDGPU;
> + ehdr.e_flags = elf_arch;
> +
> + /* Load the section headers so we can walk them later. */
> + Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr)
> + * ehdr.e_shnum);
> + if (fseek (outfd, ehdr.e_shoff, SEEK_SET) == -1
> + || fread (sections, sizeof (Elf64_Shdr), ehdr.e_shnum,
> + outfd) != ehdr.e_shnum)
> + {
> + free (sections);
> + fclose (outfd);
> + return true;
> + }
> +
> + /* Convert the host relocations to target relocations. */
> + for (int i = 0; i < ehdr.e_shnum; i++)
> + {
> + if (sections[i].sh_type != SHT_RELA)
> + continue;
> +
> + char *data = (char *)xmalloc (sections[i].sh_size);
> + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1
> + || fread (data, sections[i].sh_size, 1, outfd) != 1)
> + {
> + free (data);
> + continue;
> + }
> +
> + for (size_t offset = 0;
> + offset < sections[i].sh_size;
> + offset += sections[i].sh_entsize)
> + {
> + Elf64_Rela *reloc = (Elf64_Rela *) (data + offset);
> +
> + /* Map the host relocations to GCN relocations.
> + Only relocations that can appear in DWARF need be handled. */
> + switch (ELF64_R_TYPE (reloc->r_info))
> + {
> + case R_X86_64_32:
> + case R_X86_64_32S:
> + reloc->r_info = R_AMDGPU_ABS32;
> + break;
> + case R_X86_64_PC32:
> + reloc->r_info = R_AMDGPU_REL32;
> + break;
> + case R_X86_64_PC64:
> + reloc->r_info = R_AMDGPU_REL64;
> + break;
> + case R_X86_64_64:
> + reloc->r_info = R_AMDGPU_ABS64;
> + break;
> + case R_X86_64_RELATIVE:
> + reloc->r_info = R_AMDGPU_RELATIVE64;
> + break;
> + default:
> + gcc_unreachable ();
> + }
> + }
> +
> + /* Write back our relocation changes. */
> + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1)
> + fwrite (data, sections[i].sh_size, 1, outfd);
> +
> + free (data);
> + }
> +
> + /* Weaken any global undefined symbols that would pull in unwanted
> + objects. */
> + for (int i = 0; i < ehdr.e_shnum; i++)
> + {
> + if (sections[i].sh_type != SHT_SYMTAB)
> + continue;
> +
> + char *data = (char *)xmalloc (sections[i].sh_size);
> + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) == -1
> + || fread (data, sections[i].sh_size, 1, outfd) != 1)
> + {
> + free (data);
> + continue;
> + }
> +
> + for (size_t offset = 0;
> + offset < sections[i].sh_size;
> + offset += sections[i].sh_entsize)
> + {
> + Elf64_Sym *sym = (Elf64_Sym *) (data + offset);
> + int type = ELF64_ST_TYPE (sym->st_info);
> + int bind = ELF64_ST_BIND (sym->st_info);
> +
> + if (bind == STB_GLOBAL && sym->st_shndx == 0)
> + sym->st_info = ELF64_ST_INFO (STB_WEAK, type);
> + }
> +
> + /* Write back our symbol changes. */
> + if (fseek (outfd, sections[i].sh_offset, SEEK_SET) != -1)
> + fwrite (data, sections[i].sh_size, 1, outfd);
> +
> + free (data);
> + }
> + free (sections);
> +
> + /* Write back our header changes. */
> + rewind (outfd);
> + fwrite (&ehdr, sizeof (ehdr), 1, outfd);
> +
> + fclose (outfd);
> + return true;
> +}
> +
> /* Parse an input assembler file, extract the offload tables etc.,
> and output (1) the assembler code, minus the tables (which can contain
> problematic relocations), and (2) a C file with the offload tables
> @@ -538,9 +734,15 @@ main (int argc, char **argv)
> FILE *cfile = stdout;
> const char *outname = 0;
>
> + const char *gcn_s1_name;
> + const char *gcn_s2_name;
> + const char *gcn_o_name;
> + const char *gcn_cfile_name;
> +
> progname = "mkoffload";
> diagnostic_initialize (global_dc, 0);
>
> + obstack_init (&files_to_cleanup);
> if (atexit (mkoffload_cleanup) != 0)
> fatal_error (input_location, "atexit failed");
>
> @@ -632,7 +834,14 @@ main (int argc, char **argv)
> else if (strcmp (argv[i], "-dumpbase") == 0
> && i + 1 < argc)
> dumppfx = argv[++i];
> + else if (strcmp (argv[i], "-march=fiji") == 0)
> + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803;
> + else if (strcmp (argv[i], "-march=gfx900") == 0)
> + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;
> + else if (strcmp (argv[i], "-march=gfx906") == 0)
> + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX906;
> }
> +
> if (!(fopenacc ^ fopenmp))
> fatal_error (input_location, "either -fopenacc or -fopenmp must be set");
>
> @@ -693,6 +902,10 @@ main (int argc, char **argv)
> gcn_o_name = make_temp_file (".mkoffload.hsaco");
> gcn_cfile_name = make_temp_file (".c");
> }
> + obstack_ptr_grow (&files_to_cleanup, gcn_s1_name);
> + obstack_ptr_grow (&files_to_cleanup, gcn_s2_name);
> + obstack_ptr_grow (&files_to_cleanup, gcn_o_name);
> + obstack_ptr_grow (&files_to_cleanup, gcn_cfile_name);
>
> obstack_ptr_grow (&cc_argv_obstack, "-dumpdir");
> obstack_ptr_grow (&cc_argv_obstack, "");
> @@ -710,6 +923,39 @@ main (int argc, char **argv)
> struct obstack ld_argv_obstack;
> obstack_init (&ld_argv_obstack);
> obstack_ptr_grow (&ld_argv_obstack, driver);
> +
> + /* Extract early-debug information from the input objects.
> + This loop finds all the inputs that end ".o" and aren't the output. */
> + int dbgcount = 0;
> + for (int ix = 1; ix != argc; ix++)
> + {
> + if (!strcmp (argv[ix], "-o") && ix + 1 != argc)
> + ++ix;
> + else
> + {
> + if (strcmp (argv[ix] + strlen(argv[ix]) - 2, ".o") == 0)
> + {
> + char *dbgobj;
> + if (save_temps)
> + {
> + char buf[10];
> + sprintf (buf, "%d", dbgcount++);
> + dbgobj = concat (dumppfx, ".mkoffload.dbg", buf, ".o", NULL);
> + }
> + else
> + dbgobj = make_temp_file (".mkoffload.dbg.o");
> +
> + /* If the copy fails then just ignore it. */
> + if (copy_early_debug_info (argv[ix], dbgobj))
> + {
> + obstack_ptr_grow (&ld_argv_obstack, dbgobj);
> + obstack_ptr_grow (&files_to_cleanup, dbgobj);
> + }
> + else
> + free (dbgobj);
> + }
> + }
> + }
> obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
> obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
>
-----------------
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander
Walter