https://gcc.gnu.org/g:ed6690a0ca911138abd4d707510fd03ef188a28b
commit r15-382-ged6690a0ca911138abd4d707510fd03ef188a28b Author: Mark Harmstone <m...@harmstone.com> Date: Sat May 11 08:15:43 2024 -0600 [PATCH v2 2/4] Output file checksums in CodeView section Outputs the file name and MD5 hash of the main source file into the CodeView .debug$S section, along with that of any #include'd files. gcc/ * dwarf2codeview.cc (DEBUG_S_STRINGTABLE): Define. (DEBUG_S_FILECHKSMS, CHKSUM_TYPE_MD5, HASH_SIZE): Likewise. (codeview_string, codeview_source_file): New structures. (struct string_hasher): New class for codeview_string hashing. (files, last_file, num_files, string_offset): New variables. (strings_hstab, strings, last_string): Likewise. (add_string, codevie_start_source_file): New functions. (write_strings_tabe, write_soruce_files): Likewise. (codeview_debug_finish): Call new functions. * dwarf2codeview.h (codeview_start_source_file): Prototype. * dwarf2out.cc (dwarf2out_start_source_file): Handle codeview. Diff: --- gcc/dwarf2codeview.cc | 254 ++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/dwarf2codeview.h | 1 + gcc/dwarf2out.cc | 5 + 3 files changed, 260 insertions(+) diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc index f08f5d55ad7c..da8315310b50 100644 --- a/gcc/dwarf2codeview.cc +++ b/gcc/dwarf2codeview.cc @@ -39,6 +39,257 @@ along with GCC; see the file COPYING3. If not see #define CV_SIGNATURE_C13 4 +#define DEBUG_S_STRINGTABLE 0xf3 +#define DEBUG_S_FILECHKSMS 0xf4 + +#define CHKSUM_TYPE_MD5 1 + +#define HASH_SIZE 16 + +struct codeview_string +{ + codeview_string *next; + uint32_t offset; + char *string; +}; + +struct string_hasher : free_ptr_hash <struct codeview_string> +{ + typedef const char *compare_type; + + static hashval_t hash (const codeview_string *x) + { + return htab_hash_string (x->string); + } + + static bool equal (const codeview_string *x, const char *y) + { + return !strcmp (x->string, y); + } + + static void mark_empty (codeview_string *x) + { + if (x->string) + { + free (x->string); + x->string = NULL; + } + } + + static void remove (codeview_string *&x) + { + free (x->string); + } +}; + +struct codeview_source_file +{ + codeview_source_file *next; + unsigned int file_num; + uint32_t string_offset; + char *filename; + uint8_t hash[HASH_SIZE]; +}; + +static codeview_source_file *files, *last_file; +static unsigned int num_files; +static uint32_t string_offset = 1; +static hash_table<string_hasher> *strings_htab; +static codeview_string *strings, *last_string; + +/* Adds string to the string table, returning its offset. If already present, + this returns the offset of the existing string. */ + +static uint32_t +add_string (const char *string) +{ + codeview_string **slot; + codeview_string *s; + size_t len; + + if (!strings_htab) + strings_htab = new hash_table<string_hasher> (10); + + slot = strings_htab->find_slot_with_hash (string, htab_hash_string (string), + INSERT); + + if (*slot) + return (*slot)->offset; + + s = (codeview_string *) xmalloc (sizeof (codeview_string)); + len = strlen (string); + + s->next = NULL; + + s->offset = string_offset; + string_offset += len + 1; + + s->string = xstrdup (string); + + if (last_string) + last_string->next = s; + else + strings = s; + + last_string = s; + + *slot = s; + + return s->offset; +} + +/* A new source file has been encountered - record the details and calculate + its hash. */ + +void +codeview_start_source_file (const char *filename) +{ + codeview_source_file *sf; + char *path; + uint32_t string_offset; + FILE *f; + + path = lrealpath (filename); + string_offset = add_string (path); + free (path); + + sf = files; + while (sf) + { + if (sf->string_offset == string_offset) + return; + + sf = sf->next; + } + + sf = (codeview_source_file *) xmalloc (sizeof (codeview_source_file)); + sf->next = NULL; + sf->file_num = num_files; + sf->string_offset = string_offset; + sf->filename = xstrdup (filename); + + f = fopen (filename, "r"); + if (!f) + internal_error ("could not open %s for reading", filename); + + if (md5_stream (f, sf->hash)) + { + fclose (f); + internal_error ("md5_stream failed"); + } + + fclose (f); + + if (last_file) + last_file->next = sf; + else + files = sf; + + last_file = sf; + num_files++; +} + +/* Write out the strings table into the .debug$S section. The linker will + parse this, and handle the deduplication and hashing for all the object + files. */ + +static void +write_strings_table (void) +{ + codeview_string *string; + + fputs (integer_asm_op (4, false), asm_out_file); + fprint_whex (asm_out_file, DEBUG_S_STRINGTABLE); + putc ('\n', asm_out_file); + + fputs (integer_asm_op (4, false), asm_out_file); + asm_fprintf (asm_out_file, "%LLcv_strings_end - %LLcv_strings_start\n"); + + asm_fprintf (asm_out_file, "%LLcv_strings_start:\n"); + + /* The first entry is always an empty string. */ + fputs (integer_asm_op (1, false), asm_out_file); + fprint_whex (asm_out_file, 0); + putc ('\n', asm_out_file); + + string = strings; + while (string) + { + ASM_OUTPUT_ASCII (asm_out_file, string->string, + strlen (string->string) + 1); + + string = string->next; + } + + delete strings_htab; + + asm_fprintf (asm_out_file, "%LLcv_strings_end:\n"); + + ASM_OUTPUT_ALIGN (asm_out_file, 2); +} + +/* Write out the file checksums data into the .debug$S section. */ + +static void +write_source_files (void) +{ + fputs (integer_asm_op (4, false), asm_out_file); + fprint_whex (asm_out_file, DEBUG_S_FILECHKSMS); + putc ('\n', asm_out_file); + + fputs (integer_asm_op (4, false), asm_out_file); + asm_fprintf (asm_out_file, + "%LLcv_filechksms_end - %LLcv_filechksms_start\n"); + + asm_fprintf (asm_out_file, "%LLcv_filechksms_start:\n"); + + while (files) + { + codeview_source_file *next = files->next; + + /* This is struct file_checksum in binutils, or filedata in Microsoft's + dumpsym7.cpp: + + struct file_checksum + { + uint32_t file_id; + uint8_t checksum_length; + uint8_t checksum_type; + } ATTRIBUTE_PACKED; + + followed then by the bytes of the hash, padded to the next 4 bytes. + file_id here is actually the offset in the strings table. */ + + fputs (integer_asm_op (4, false), asm_out_file); + fprint_whex (asm_out_file, files->string_offset); + putc ('\n', asm_out_file); + + fputs (integer_asm_op (1, false), asm_out_file); + fprint_whex (asm_out_file, HASH_SIZE); + putc ('\n', asm_out_file); + + fputs (integer_asm_op (1, false), asm_out_file); + fprint_whex (asm_out_file, CHKSUM_TYPE_MD5); + putc ('\n', asm_out_file); + + for (unsigned int i = 0; i < HASH_SIZE; i++) + { + fputs (integer_asm_op (1, false), asm_out_file); + fprint_whex (asm_out_file, files->hash[i]); + putc ('\n', asm_out_file); + } + + ASM_OUTPUT_ALIGN (asm_out_file, 2); + + free (files->filename); + free (files); + + files = next; + } + + asm_fprintf (asm_out_file, "%LLcv_filechksms_end:\n"); +} + /* Finish CodeView debug info emission. */ void @@ -49,6 +300,9 @@ codeview_debug_finish (void) fputs (integer_asm_op (4, false), asm_out_file); fprint_whex (asm_out_file, CV_SIGNATURE_C13); putc ('\n', asm_out_file); + + write_strings_table (); + write_source_files (); } #endif diff --git a/gcc/dwarf2codeview.h b/gcc/dwarf2codeview.h index efda148eb498..e2d732bb9b6a 100644 --- a/gcc/dwarf2codeview.h +++ b/gcc/dwarf2codeview.h @@ -26,5 +26,6 @@ along with GCC; see the file COPYING3. If not see /* Debug Format Interface. Used in dwarf2out.cc. */ extern void codeview_debug_finish (void); +extern void codeview_start_source_file (const char *); #endif /* GCC_DWARF2CODEVIEW_H */ diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc index f7e9f90e2746..43ef714b75e5 100644 --- a/gcc/dwarf2out.cc +++ b/gcc/dwarf2out.cc @@ -28889,6 +28889,11 @@ dwarf2out_set_ignored_loc (unsigned int line, unsigned int column, static void dwarf2out_start_source_file (unsigned int lineno, const char *filename) { +#ifdef CODEVIEW_DEBUGGING_INFO + if (codeview_debuginfo_p ()) + codeview_start_source_file (filename); +#endif + if (debug_info_level >= DINFO_LEVEL_VERBOSE) { macinfo_entry e;