Author: David Spickett
Date: 2024-05-10T09:20:48+01:00
New Revision: a76518cadc5eaa6b6d07334e2b5bc08382aabe49

URL: 
https://github.com/llvm/llvm-project/commit/a76518cadc5eaa6b6d07334e2b5bc08382aabe49
DIFF: 
https://github.com/llvm/llvm-project/commit/a76518cadc5eaa6b6d07334e2b5bc08382aabe49.diff

LOG: [lldb][ELF] Return address class map changes from symbol table parsing 
methods (#91585)

Instead of updating the member of the ObjectFileELF instance. This means
that if one object file asks another to parse the symbol table, that
first object's can update its address class map with the same changes
that the other object did.

(I'm not returning a reference to the other object's m_address_class_map
member because there may be other things in there not related to the
symbol table being parsed)

This will fix the code added in
https://github.com/llvm/llvm-project/pull/90622 which broke the test
`Expr/TestStringLiteralExpr.test` on 32 bit Arm Linux.

This happened because we had the program file, then asked for a better
object file, which returned the same program file again. This creates a
second ObjectFileELF for the same file, so when we tell the second
instance to parse the symbol table it actually calls into the first
instance, leaving the address class map of the second instance empty.

Which caused us to put an Arm breakpoint instuction at a Thumb return
address and broke the ability to call mmap.

Added: 
    

Modified: 
    lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
    lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h

Removed: 
    


################################################################################
diff  --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp 
b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 1646ee9aa34a6..d88f2d0830192 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -2060,13 +2060,17 @@ static char FindArmAarch64MappingSymbol(const char 
*symbol_name) {
 #define IS_MICROMIPS(ST_OTHER) (((ST_OTHER)&STO_MIPS_ISA) == STO_MICROMIPS)
 
 // private
-unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
-                                     SectionList *section_list,
-                                     const size_t num_symbols,
-                                     const DataExtractor &symtab_data,
-                                     const DataExtractor &strtab_data) {
+std::pair<unsigned, ObjectFileELF::FileAddressToAddressClassMap>
+ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
+                            SectionList *section_list, const size_t 
num_symbols,
+                            const DataExtractor &symtab_data,
+                            const DataExtractor &strtab_data) {
   ELFSymbol symbol;
   lldb::offset_t offset = 0;
+  // The changes these symbols would make to the class map. We will also update
+  // m_address_class_map but need to tell the caller what changed because the
+  // caller may be another object file.
+  FileAddressToAddressClassMap address_class_map;
 
   static ConstString text_section_name(".text");
   static ConstString init_section_name(".init");
@@ -2213,18 +2217,18 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, 
user_id_t start_id,
             switch (mapping_symbol) {
             case 'a':
               // $a[.<any>]* - marks an ARM instruction sequence
-              m_address_class_map[symbol.st_value] = AddressClass::eCode;
+              address_class_map[symbol.st_value] = AddressClass::eCode;
               break;
             case 'b':
             case 't':
               // $b[.<any>]* - marks a THUMB BL instruction sequence
               // $t[.<any>]* - marks a THUMB instruction sequence
-              m_address_class_map[symbol.st_value] =
+              address_class_map[symbol.st_value] =
                   AddressClass::eCodeAlternateISA;
               break;
             case 'd':
               // $d[.<any>]* - marks a data item sequence (e.g. lit pool)
-              m_address_class_map[symbol.st_value] = AddressClass::eData;
+              address_class_map[symbol.st_value] = AddressClass::eData;
               break;
             }
           }
@@ -2238,11 +2242,11 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, 
user_id_t start_id,
             switch (mapping_symbol) {
             case 'x':
               // $x[.<any>]* - marks an A64 instruction sequence
-              m_address_class_map[symbol.st_value] = AddressClass::eCode;
+              address_class_map[symbol.st_value] = AddressClass::eCode;
               break;
             case 'd':
               // $d[.<any>]* - marks a data item sequence (e.g. lit pool)
-              m_address_class_map[symbol.st_value] = AddressClass::eData;
+              address_class_map[symbol.st_value] = AddressClass::eData;
               break;
             }
           }
@@ -2260,11 +2264,11 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, 
user_id_t start_id,
             // conjunction with symbol.st_value to produce the final
             // symbol_value that we store in the symtab.
             symbol_value_offset = -1;
-            m_address_class_map[symbol.st_value ^ 1] =
+            address_class_map[symbol.st_value ^ 1] =
                 AddressClass::eCodeAlternateISA;
           } else {
             // This address is ARM
-            m_address_class_map[symbol.st_value] = AddressClass::eCode;
+            address_class_map[symbol.st_value] = AddressClass::eCode;
           }
         }
       }
@@ -2285,17 +2289,17 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, 
user_id_t start_id,
       */
       if (arch.IsMIPS()) {
         if (IS_MICROMIPS(symbol.st_other))
-          m_address_class_map[symbol.st_value] = 
AddressClass::eCodeAlternateISA;
+          address_class_map[symbol.st_value] = AddressClass::eCodeAlternateISA;
         else if ((symbol.st_value & 1) && (symbol_type == eSymbolTypeCode)) {
           symbol.st_value = symbol.st_value & (~1ull);
-          m_address_class_map[symbol.st_value] = 
AddressClass::eCodeAlternateISA;
+          address_class_map[symbol.st_value] = AddressClass::eCodeAlternateISA;
         } else {
           if (symbol_type == eSymbolTypeCode)
-            m_address_class_map[symbol.st_value] = AddressClass::eCode;
+            address_class_map[symbol.st_value] = AddressClass::eCode;
           else if (symbol_type == eSymbolTypeData)
-            m_address_class_map[symbol.st_value] = AddressClass::eData;
+            address_class_map[symbol.st_value] = AddressClass::eData;
           else
-            m_address_class_map[symbol.st_value] = AddressClass::eUnknown;
+            address_class_map[symbol.st_value] = AddressClass::eUnknown;
         }
       }
     }
@@ -2392,24 +2396,33 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, 
user_id_t start_id,
       dc_symbol.SetIsWeak(true);
     symtab->AddSymbol(dc_symbol);
   }
-  return i;
+
+  m_address_class_map.merge(address_class_map);
+  return {i, address_class_map};
 }
 
-unsigned ObjectFileELF::ParseSymbolTable(Symtab *symbol_table,
-                                         user_id_t start_id,
-                                         lldb_private::Section *symtab) {
+std::pair<unsigned, ObjectFileELF::FileAddressToAddressClassMap>
+ObjectFileELF::ParseSymbolTable(Symtab *symbol_table, user_id_t start_id,
+                                lldb_private::Section *symtab) {
   if (symtab->GetObjectFile() != this) {
     // If the symbol table section is owned by a 
diff erent object file, have it
     // do the parsing.
     ObjectFileELF *obj_file_elf =
         static_cast<ObjectFileELF *>(symtab->GetObjectFile());
-    return obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab);
+    auto [num_symbols, address_class_map] =
+        obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab);
+
+    // The other object file returned the changes it made to its address
+    // class map, make the same changes to ours.
+    m_address_class_map.merge(address_class_map);
+
+    return {num_symbols, address_class_map};
   }
 
   // Get section list for this object file.
   SectionList *section_list = m_sections_up.get();
   if (!section_list)
-    return 0;
+    return {};
 
   user_id_t symtab_id = symtab->GetID();
   const ELFSectionHeaderInfo *symtab_hdr = GetSectionHeaderByIndex(symtab_id);
@@ -2435,7 +2448,7 @@ unsigned ObjectFileELF::ParseSymbolTable(Symtab 
*symbol_table,
     }
   }
 
-  return 0;
+  return {0, {}};
 }
 
 size_t ObjectFileELF::ParseDynamicSymbols() {
@@ -2972,8 +2985,12 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
   // while the reverse is not necessarily true.
   Section *symtab =
       section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get();
-  if (symtab)
-    symbol_id += ParseSymbolTable(&lldb_symtab, symbol_id, symtab);
+  if (symtab) {
+    auto [num_symbols, address_class_map] =
+        ParseSymbolTable(&lldb_symtab, symbol_id, symtab);
+    m_address_class_map.merge(address_class_map);
+    symbol_id += num_symbols;
+  }
 
   // The symtab section is non-allocable and can be stripped, while the
   // .dynsym section which should always be always be there. To support the
@@ -2986,8 +3003,12 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
     Section *dynsym =
         section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
             .get();
-    if (dynsym)
-      symbol_id += ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
+    if (dynsym) {
+      auto [num_symbols, address_class_map] =
+          ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
+      symbol_id += num_symbols;
+      m_address_class_map.merge(address_class_map);
+    }
   }
 
   // DT_JMPREL

diff  --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h 
b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
index bc8e34981a9d8..716bbe01638f3 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
@@ -285,18 +285,19 @@ class ObjectFileELF : public lldb_private::ObjectFile {
 
   /// Populates the symbol table with all non-dynamic linker symbols.  This
   /// method will parse the symbols only once.  Returns the number of symbols
-  /// parsed.
-  unsigned ParseSymbolTable(lldb_private::Symtab *symbol_table,
-                            lldb::user_id_t start_id,
-                            lldb_private::Section *symtab);
+  /// parsed and a map of address types (used by targets like Arm that have
+  /// an alternative ISA mode like Thumb).
+  std::pair<unsigned, FileAddressToAddressClassMap>
+  ParseSymbolTable(lldb_private::Symtab *symbol_table, lldb::user_id_t 
start_id,
+                   lldb_private::Section *symtab);
 
   /// Helper routine for ParseSymbolTable().
-  unsigned ParseSymbols(lldb_private::Symtab *symbol_table,
-                        lldb::user_id_t start_id,
-                        lldb_private::SectionList *section_list,
-                        const size_t num_symbols,
-                        const lldb_private::DataExtractor &symtab_data,
-                        const lldb_private::DataExtractor &strtab_data);
+  std::pair<unsigned, FileAddressToAddressClassMap>
+  ParseSymbols(lldb_private::Symtab *symbol_table, lldb::user_id_t start_id,
+               lldb_private::SectionList *section_list,
+               const size_t num_symbols,
+               const lldb_private::DataExtractor &symtab_data,
+               const lldb_private::DataExtractor &strtab_data);
 
   /// Scans the relocation entries and adds a set of artificial symbols to the
   /// given symbol table for each PLT slot.  Returns the number of symbols


        
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to