https://gcc.gnu.org/g:ed6690a0ca911138abd4d707510fd03ef188a28b

commit r15-382-ged6690a0ca911138abd4d707510fd03ef188a28b
Author: Mark Harmstone <m...@harmstone.com>
Date:   Sat May 11 08:15:43 2024 -0600

    [PATCH v2 2/4] Output file checksums in CodeView section
    
    Outputs the file name and MD5 hash of the main source file into the
    CodeView .debug$S section, along with that of any #include'd files.
    
    gcc/
            * dwarf2codeview.cc (DEBUG_S_STRINGTABLE): Define.
            (DEBUG_S_FILECHKSMS, CHKSUM_TYPE_MD5, HASH_SIZE): Likewise.
            (codeview_string, codeview_source_file): New structures.
            (struct string_hasher): New class for codeview_string hashing.
            (files, last_file, num_files, string_offset): New variables.
            (strings_hstab, strings, last_string): Likewise.
            (add_string, codevie_start_source_file): New functions.
            (write_strings_tabe, write_soruce_files): Likewise.
            (codeview_debug_finish): Call new functions.
            * dwarf2codeview.h (codeview_start_source_file): Prototype.
            * dwarf2out.cc (dwarf2out_start_source_file): Handle codeview.

Diff:
---
 gcc/dwarf2codeview.cc | 254 ++++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/dwarf2codeview.h  |   1 +
 gcc/dwarf2out.cc      |   5 +
 3 files changed, 260 insertions(+)

diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc
index f08f5d55ad7c..da8315310b50 100644
--- a/gcc/dwarf2codeview.cc
+++ b/gcc/dwarf2codeview.cc
@@ -39,6 +39,257 @@ along with GCC; see the file COPYING3.  If not see
 
 #define CV_SIGNATURE_C13       4
 
+#define DEBUG_S_STRINGTABLE     0xf3
+#define DEBUG_S_FILECHKSMS      0xf4
+
+#define CHKSUM_TYPE_MD5                1
+
+#define HASH_SIZE 16
+
+struct codeview_string
+{
+  codeview_string *next;
+  uint32_t offset;
+  char *string;
+};
+
+struct string_hasher : free_ptr_hash <struct codeview_string>
+{
+  typedef const char *compare_type;
+
+  static hashval_t hash (const codeview_string *x)
+  {
+    return htab_hash_string (x->string);
+  }
+
+  static bool equal (const codeview_string *x, const char *y)
+  {
+    return !strcmp (x->string, y);
+  }
+
+  static void mark_empty (codeview_string *x)
+  {
+    if (x->string)
+      {
+       free (x->string);
+       x->string = NULL;
+      }
+  }
+
+  static void remove (codeview_string *&x)
+  {
+    free (x->string);
+  }
+};
+
+struct codeview_source_file
+{
+  codeview_source_file *next;
+  unsigned int file_num;
+  uint32_t string_offset;
+  char *filename;
+  uint8_t hash[HASH_SIZE];
+};
+
+static codeview_source_file *files, *last_file;
+static unsigned int num_files;
+static uint32_t string_offset = 1;
+static hash_table<string_hasher> *strings_htab;
+static codeview_string *strings, *last_string;
+
+/* Adds string to the string table, returning its offset.  If already present,
+   this returns the offset of the existing string.  */
+
+static uint32_t
+add_string (const char *string)
+{
+  codeview_string **slot;
+  codeview_string *s;
+  size_t len;
+
+  if (!strings_htab)
+    strings_htab = new hash_table<string_hasher> (10);
+
+  slot = strings_htab->find_slot_with_hash (string, htab_hash_string (string),
+                                           INSERT);
+
+  if (*slot)
+    return (*slot)->offset;
+
+  s = (codeview_string *) xmalloc (sizeof (codeview_string));
+  len = strlen (string);
+
+  s->next = NULL;
+
+  s->offset = string_offset;
+  string_offset += len + 1;
+
+  s->string = xstrdup (string);
+
+  if (last_string)
+    last_string->next = s;
+  else
+    strings = s;
+
+  last_string = s;
+
+  *slot = s;
+
+  return s->offset;
+}
+
+/* A new source file has been encountered - record the details and calculate
+   its hash.  */
+
+void
+codeview_start_source_file (const char *filename)
+{
+  codeview_source_file *sf;
+  char *path;
+  uint32_t string_offset;
+  FILE *f;
+
+  path = lrealpath (filename);
+  string_offset = add_string (path);
+  free (path);
+
+  sf = files;
+  while (sf)
+    {
+      if (sf->string_offset == string_offset)
+       return;
+
+      sf = sf->next;
+    }
+
+  sf = (codeview_source_file *) xmalloc (sizeof (codeview_source_file));
+  sf->next = NULL;
+  sf->file_num = num_files;
+  sf->string_offset = string_offset;
+  sf->filename = xstrdup (filename);
+
+  f = fopen (filename, "r");
+  if (!f)
+    internal_error ("could not open %s for reading", filename);
+
+  if (md5_stream (f, sf->hash))
+    {
+      fclose (f);
+      internal_error ("md5_stream failed");
+    }
+
+  fclose (f);
+
+  if (last_file)
+    last_file->next = sf;
+  else
+    files = sf;
+
+  last_file = sf;
+  num_files++;
+}
+
+/* Write out the strings table into the .debug$S section.  The linker will
+   parse this, and handle the deduplication and hashing for all the object
+   files.  */
+
+static void
+write_strings_table (void)
+{
+  codeview_string *string;
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, DEBUG_S_STRINGTABLE);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  asm_fprintf (asm_out_file, "%LLcv_strings_end - %LLcv_strings_start\n");
+
+  asm_fprintf (asm_out_file, "%LLcv_strings_start:\n");
+
+  /* The first entry is always an empty string.  */
+  fputs (integer_asm_op (1, false), asm_out_file);
+  fprint_whex (asm_out_file, 0);
+  putc ('\n', asm_out_file);
+
+  string = strings;
+  while (string)
+    {
+      ASM_OUTPUT_ASCII (asm_out_file, string->string,
+                       strlen (string->string) + 1);
+
+      string = string->next;
+    }
+
+  delete strings_htab;
+
+  asm_fprintf (asm_out_file, "%LLcv_strings_end:\n");
+
+  ASM_OUTPUT_ALIGN (asm_out_file, 2);
+}
+
+/* Write out the file checksums data into the .debug$S section.  */
+
+static void
+write_source_files (void)
+{
+  fputs (integer_asm_op (4, false), asm_out_file);
+  fprint_whex (asm_out_file, DEBUG_S_FILECHKSMS);
+  putc ('\n', asm_out_file);
+
+  fputs (integer_asm_op (4, false), asm_out_file);
+  asm_fprintf (asm_out_file,
+              "%LLcv_filechksms_end - %LLcv_filechksms_start\n");
+
+  asm_fprintf (asm_out_file, "%LLcv_filechksms_start:\n");
+
+  while (files)
+    {
+      codeview_source_file *next = files->next;
+
+      /* This is struct file_checksum in binutils, or filedata in Microsoft's
+        dumpsym7.cpp:
+
+       struct file_checksum
+       {
+         uint32_t file_id;
+         uint8_t checksum_length;
+         uint8_t checksum_type;
+       } ATTRIBUTE_PACKED;
+
+       followed then by the bytes of the hash, padded to the next 4 bytes.
+       file_id here is actually the offset in the strings table.  */
+
+      fputs (integer_asm_op (4, false), asm_out_file);
+      fprint_whex (asm_out_file, files->string_offset);
+      putc ('\n', asm_out_file);
+
+      fputs (integer_asm_op (1, false), asm_out_file);
+      fprint_whex (asm_out_file, HASH_SIZE);
+      putc ('\n', asm_out_file);
+
+      fputs (integer_asm_op (1, false), asm_out_file);
+      fprint_whex (asm_out_file, CHKSUM_TYPE_MD5);
+      putc ('\n', asm_out_file);
+
+      for (unsigned int i = 0; i < HASH_SIZE; i++)
+       {
+         fputs (integer_asm_op (1, false), asm_out_file);
+         fprint_whex (asm_out_file, files->hash[i]);
+         putc ('\n', asm_out_file);
+       }
+
+      ASM_OUTPUT_ALIGN (asm_out_file, 2);
+
+      free (files->filename);
+      free (files);
+
+      files = next;
+    }
+
+  asm_fprintf (asm_out_file, "%LLcv_filechksms_end:\n");
+}
+
 /* Finish CodeView debug info emission.  */
 
 void
@@ -49,6 +300,9 @@ codeview_debug_finish (void)
   fputs (integer_asm_op (4, false), asm_out_file);
   fprint_whex (asm_out_file, CV_SIGNATURE_C13);
   putc ('\n', asm_out_file);
+
+  write_strings_table ();
+  write_source_files ();
 }
 
 #endif
diff --git a/gcc/dwarf2codeview.h b/gcc/dwarf2codeview.h
index efda148eb498..e2d732bb9b6a 100644
--- a/gcc/dwarf2codeview.h
+++ b/gcc/dwarf2codeview.h
@@ -26,5 +26,6 @@ along with GCC; see the file COPYING3.  If not see
 /* Debug Format Interface.  Used in dwarf2out.cc.  */
 
 extern void codeview_debug_finish (void);
+extern void codeview_start_source_file (const char *);
 
 #endif /* GCC_DWARF2CODEVIEW_H */
diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index f7e9f90e2746..43ef714b75e5 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -28889,6 +28889,11 @@ dwarf2out_set_ignored_loc (unsigned int line, unsigned 
int column,
 static void
 dwarf2out_start_source_file (unsigned int lineno, const char *filename)
 {
+#ifdef CODEVIEW_DEBUGGING_INFO
+  if (codeview_debuginfo_p ())
+    codeview_start_source_file (filename);
+#endif
+
   if (debug_info_level >= DINFO_LEVEL_VERBOSE)
     {
       macinfo_entry e;

Reply via email to