[PATCH] D156580: [FunctionImport] Reduce string duplication (NFC)

2023-08-04 Thread Teresa Johnson via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG65e57bbed06d: [FunctionImport] Reduce string duplication 
(NFC) (authored by tejohnson).

Changed prior to commit:
  https://reviews.llvm.org/D156580?vs=545303=547359#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D156580/new/

https://reviews.llvm.org/D156580

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/include/llvm/LTO/LTO.h
  llvm/include/llvm/Transforms/IPO/FunctionImport.h
  llvm/lib/LTO/LTO.cpp
  llvm/lib/LTO/ThinLTOCodeGenerator.cpp
  llvm/lib/Transforms/IPO/FunctionImport.cpp
  llvm/tools/llvm-link/llvm-link.cpp

Index: llvm/tools/llvm-link/llvm-link.cpp
===
--- llvm/tools/llvm-link/llvm-link.cpp
+++ llvm/tools/llvm-link/llvm-link.cpp
@@ -323,6 +323,11 @@
   };
 
   ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
+  // Owns the filename strings used to key into the ImportList. Normally this is
+  // constructed from the index and the strings are owned by the index, however,
+  // since we are synthesizing this data structure from options we need a cache
+  // to own those strings.
+  StringSet<> FileNameStringCache;
   for (const auto  : Imports) {
 // Identify the requested function and its bitcode source file.
 size_t Idx = Import.find(':');
@@ -360,7 +365,8 @@
 if (Verbose)
   errs() << "Importing " << FunctionName << " from " << FileName << "\n";
 
-auto  = ImportList[FileName];
+auto  =
+ImportList[FileNameStringCache.insert(FileName).first->getKey()];
 Entry.insert(F->getGUID());
   }
   auto CachedModuleLoader = [&](StringRef Identifier) {
Index: llvm/lib/Transforms/IPO/FunctionImport.cpp
===
--- llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -272,7 +272,7 @@
   function_ref
   IsPrevailing;
   FunctionImporter::ImportMapTy 
-  StringMap *const ExportLists;
+  DenseMap *const ExportLists;
 
   bool shouldImportGlobal(const ValueInfo ) {
 const auto  = DefinedGVSummaries.find(VI.getGUID());
@@ -357,7 +357,7 @@
   function_ref
   IsPrevailing,
   FunctionImporter::ImportMapTy ,
-  StringMap *ExportLists)
+  DenseMap *ExportLists)
   : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
 IsPrevailing(IsPrevailing), ImportList(ImportList),
 ExportLists(ExportLists) {}
@@ -403,7 +403,7 @@
 isPrevailing,
 SmallVectorImpl , GlobalsImporter ,
 FunctionImporter::ImportMapTy ,
-StringMap *ExportLists,
+DenseMap *ExportLists,
 FunctionImporter::ImportThresholdsTy ) {
   GVImporter.onImportingSummary(Summary);
   static int ImportCount = 0;
@@ -576,7 +576,7 @@
 isPrevailing,
 const ModuleSummaryIndex , StringRef ModName,
 FunctionImporter::ImportMapTy ,
-StringMap *ExportLists = nullptr) {
+DenseMap *ExportLists = nullptr) {
   // Worklist contains the list of function imported in this module, for which
   // we will analyse the callees and may import further down the callgraph.
   SmallVector Worklist;
@@ -671,10 +671,10 @@
 #endif
 
 #ifndef NDEBUG
-static bool
-checkVariableImport(const ModuleSummaryIndex ,
-StringMap ,
-StringMap ) {
+static bool checkVariableImport(
+const ModuleSummaryIndex ,
+DenseMap ,
+DenseMap ) {
 
   DenseSet FlattenedImports;
 
@@ -702,7 +702,7 @@
   for (auto  : ExportLists)
 for (auto  : ExportPerModule.second)
   if (!FlattenedImports.count(VI.getGUID()) &&
-  IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first(), VI))
+  IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
 return false;
 
   return true;
@@ -712,19 +712,18 @@
 /// Compute all the import and export for every module using the Index.
 void llvm::ComputeCrossModuleImport(
 const ModuleSummaryIndex ,
-const StringMap ,
+const DenseMap ,
 function_ref
 isPrevailing,
-StringMap ,
-StringMap ) {
+DenseMap ,
+DenseMap ) {
   // For each module that has function defined, compute the import/export lists.
   for (const auto  : ModuleToDefinedGVSummaries) {
-auto  = ImportLists[DefinedGVSummaries.first()];
+auto  = ImportLists[DefinedGVSummaries.first];
 LLVM_DEBUG(dbgs() << "Computing import for Module '"
-  << DefinedGVSummaries.first() << "'\n");
+  << DefinedGVSummaries.first << "'\n");
 ComputeImportForModule(DefinedGVSummaries.second, isPrevailing, Index,
-   DefinedGVSummaries.first(), ImportList,
-   );
+   DefinedGVSummaries.first, ImportList, );
   }
 
   // When computing imports we only added the variables and 

[PATCH] D156580: [FunctionImport] Reduce string duplication (NFC)

2023-07-28 Thread Teresa Johnson via Phabricator via cfe-commits
tejohnson created this revision.
tejohnson added a reviewer: mtrofin.
Herald added subscribers: hoy, ormris, steven_wu, hiraditya.
Herald added a project: All.
tejohnson requested review of this revision.
Herald added projects: clang, LLVM.
Herald added a subscriber: cfe-commits.

The import/export maps, and the ModuleToDefinedGVSummaries map, are all
indexed by module paths, which are StringRef obtained from the module
summary index, which already has a data structure than owns these
strings (the ModulePathStringTable). Because these other maps are also
StringMap, which makes a copy of the string key, we were keeping
multiple extra copies of the module paths, leading to memory overhead.

Change these to DenseMap keyed by StringRef, and document that the
strings are owned by the index.

The only exception is the llvm-link tool which synthesizes an import list
from command line options, and I have added a string cache to maintain
ownership there.

I measured around 5% memory reduction in the thin link of a large
binary.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D156580

Files:
  clang/lib/CodeGen/BackendUtil.cpp
  llvm/include/llvm/LTO/LTO.h
  llvm/include/llvm/Transforms/IPO/FunctionImport.h
  llvm/lib/LTO/LTO.cpp
  llvm/lib/LTO/ThinLTOCodeGenerator.cpp
  llvm/lib/Transforms/IPO/FunctionImport.cpp
  llvm/tools/llvm-link/llvm-link.cpp

Index: llvm/tools/llvm-link/llvm-link.cpp
===
--- llvm/tools/llvm-link/llvm-link.cpp
+++ llvm/tools/llvm-link/llvm-link.cpp
@@ -323,6 +323,11 @@
   };
 
   ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
+  // Owns the filename strings used to key into the ImportList. Normally this is
+  // constructed from the index and the strings are owned by the index, however,
+  // since we are synthesizing this data structure from options we need a cache
+  // to own those strings.
+  StringSet<> FileNameStringCache;
   for (const auto  : Imports) {
 // Identify the requested function and its bitcode source file.
 size_t Idx = Import.find(':');
@@ -360,7 +365,8 @@
 if (Verbose)
   errs() << "Importing " << FunctionName << " from " << FileName << "\n";
 
-auto  = ImportList[FileName];
+auto  =
+ImportList[FileNameStringCache.insert(FileName).first->getKey()];
 Entry.insert(F->getGUID());
   }
   auto CachedModuleLoader = [&](StringRef Identifier) {
Index: llvm/lib/Transforms/IPO/FunctionImport.cpp
===
--- llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -272,7 +272,7 @@
   function_ref
   IsPrevailing;
   FunctionImporter::ImportMapTy 
-  StringMap *const ExportLists;
+  DenseMap *const ExportLists;
 
   bool shouldImportGlobal(const ValueInfo ) {
 const auto  = DefinedGVSummaries.find(VI.getGUID());
@@ -357,7 +357,7 @@
   function_ref
   IsPrevailing,
   FunctionImporter::ImportMapTy ,
-  StringMap *ExportLists)
+  DenseMap *ExportLists)
   : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
 IsPrevailing(IsPrevailing), ImportList(ImportList),
 ExportLists(ExportLists) {}
@@ -403,7 +403,7 @@
 isPrevailing,
 SmallVectorImpl , GlobalsImporter ,
 FunctionImporter::ImportMapTy ,
-StringMap *ExportLists,
+DenseMap *ExportLists,
 FunctionImporter::ImportThresholdsTy ) {
   GVImporter.onImportingSummary(Summary);
   static int ImportCount = 0;
@@ -576,7 +576,7 @@
 isPrevailing,
 const ModuleSummaryIndex , StringRef ModName,
 FunctionImporter::ImportMapTy ,
-StringMap *ExportLists = nullptr) {
+DenseMap *ExportLists = nullptr) {
   // Worklist contains the list of function imported in this module, for which
   // we will analyse the callees and may import further down the callgraph.
   SmallVector Worklist;
@@ -671,10 +671,10 @@
 #endif
 
 #ifndef NDEBUG
-static bool
-checkVariableImport(const ModuleSummaryIndex ,
-StringMap ,
-StringMap ) {
+static bool checkVariableImport(
+const ModuleSummaryIndex ,
+DenseMap ,
+DenseMap ) {
 
   DenseSet FlattenedImports;
 
@@ -702,7 +702,7 @@
   for (auto  : ExportLists)
 for (auto  : ExportPerModule.second)
   if (!FlattenedImports.count(VI.getGUID()) &&
-  IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first(), VI))
+  IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first, VI))
 return false;
 
   return true;
@@ -712,19 +712,18 @@
 /// Compute all the import and export for every module using the Index.
 void llvm::ComputeCrossModuleImport(
 const ModuleSummaryIndex ,
-const StringMap ,
+const DenseMap ,
 function_ref
 isPrevailing,
-StringMap ,
-StringMap ) {
+DenseMap ,
+DenseMap ) {
   // For each module that has function defined, compute the import/export lists.
   for (const